| /****************************************************************************** |
| * |
| * Copyright (C) 2018 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at: |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| ***************************************************************************** |
| * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| */ |
| |
| /*! |
| ****************************************************************************** |
| * \file ihevce_recur_bracketing.c |
| * |
| * \brief |
| * This file contains interface functions of recursive bracketing |
| * module |
| * \date |
| * 12/02/2012 |
| * |
| * \author |
| * Ittiam |
| * |
| * List of Functions |
| * |
| * |
| ****************************************************************************** |
| */ |
| |
| /*****************************************************************************/ |
| /* File Includes */ |
| /*****************************************************************************/ |
| /* System include files */ |
| #include <stdio.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <assert.h> |
| #include <stdarg.h> |
| #include <math.h> |
| |
| /* User include files */ |
| #include "ihevc_typedefs.h" |
| #include "itt_video_api.h" |
| #include "ihevce_api.h" |
| |
| #include "rc_cntrl_param.h" |
| #include "rc_frame_info_collector.h" |
| #include "rc_look_ahead_params.h" |
| |
| #include "ihevc_defs.h" |
| #include "ihevc_structs.h" |
| #include "ihevc_platform_macros.h" |
| #include "ihevc_deblk.h" |
| #include "ihevc_itrans_recon.h" |
| #include "ihevc_chroma_itrans_recon.h" |
| #include "ihevc_chroma_intra_pred.h" |
| #include "ihevc_intra_pred.h" |
| #include "ihevc_inter_pred.h" |
| #include "ihevc_mem_fns.h" |
| #include "ihevc_padding.h" |
| #include "ihevc_weighted_pred.h" |
| #include "ihevc_sao.h" |
| #include "ihevc_resi_trans.h" |
| #include "ihevc_quant_iquant_ssd.h" |
| #include "ihevc_cabac_tables.h" |
| |
| #include "ihevce_defs.h" |
| #include "ihevce_lap_enc_structs.h" |
| #include "ihevce_multi_thrd_structs.h" |
| #include "ihevce_me_common_defs.h" |
| #include "ihevce_had_satd.h" |
| #include "ihevce_error_codes.h" |
| #include "ihevce_bitstream.h" |
| #include "ihevce_cabac.h" |
| #include "ihevce_rdoq_macros.h" |
| #include "ihevce_function_selector.h" |
| #include "ihevce_enc_structs.h" |
| #include "ihevce_entropy_structs.h" |
| #include "ihevce_cmn_utils_instr_set_router.h" |
| #include "ihevce_enc_loop_structs.h" |
| #include "ihevce_ipe_instr_set_router.h" |
| #include "ihevce_ipe_structs.h" |
| #include "ihevce_ipe_pass.h" |
| #include "ihevce_recur_bracketing.h" |
| #include "ihevce_nbr_avail.h" |
| #include "ihevc_common_tables.h" |
| #include "ihevce_decomp_pre_intra_structs.h" |
| #include "ihevce_decomp_pre_intra_pass.h" |
| |
| #include "cast_types.h" |
| #include "osal.h" |
| #include "osal_defaults.h" |
| |
| /*****************************************************************************/ |
| /* Constant Macros */ |
| /*****************************************************************************/ |
| #define IP_DBG_L1_l2 0 |
| #define CHILD_BIAS 12 |
| |
| /*****************************************************************************/ |
| /* Globals */ |
| /*****************************************************************************/ |
| extern pf_intra_pred g_apf_lum_ip[10]; |
| |
| extern WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES]; |
| |
| UWORD8 gau1_cu_pos_x[64] = { 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, |
| 6, 7, 4, 5, 4, 5, 6, 7, 6, 7, 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, |
| 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 4, 5, 4, 5, 6, 7, 6, 7 }; |
| |
| UWORD8 gau1_cu_pos_y[64] = { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 0, 0, 1, 1, 0, 0, |
| 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 5, 5, 6, 6, 7, 7, |
| 6, 6, 7, 7, 4, 4, 5, 5, 4, 4, 5, 5, 6, 6, 7, 7, 6, 6, 7, 7 }; |
| |
| #define RESET_BIT(x, bit) (x = x & ~((WORD32)1 << bit)) |
| |
| /*****************************************************************************/ |
| /* Function Definitions */ |
| /*****************************************************************************/ |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_update_cand_list \endif |
| * |
| * \brief |
| * Final Candidate list population, nbr flag andd nbr mode update function |
| * |
| * \param[in] ps_row_cu : pointer to cu analyse struct |
| * \param[in] ps_cu_node : pointer to cu node info buffer |
| * \param[in] ps_ed_blk_l1 : pointer to level 1 and 2 decision buffer |
| * \param[in] pu1_cand_mode_list : pointer to candidate list buffer |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_update_cand_list( |
| ihevce_ipe_cu_tree_t *ps_cu_node, ihevce_ed_blk_t *ps_ed_blk_l1, ihevce_ipe_ctxt_t *ps_ctxt) |
| { |
| WORD32 row, col, x, y, size; |
| |
| /* Candidate mode Update */ |
| (void)ps_ed_blk_l1; |
| /* Update CTB mode map for the finalised CU */ |
| x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1; |
| y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1; |
| size = ps_cu_node->u1_cu_size >> 2; |
| for(row = y; row < (y + size); row++) |
| { |
| for(col = x; col < (x + size); col++) |
| { |
| ps_ctxt->au1_ctb_mode_map[row][col] = ps_cu_node->best_mode; |
| } |
| } |
| return; |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_intra_populate_mode_bits_cost_bracketing \endif |
| * |
| * \brief |
| * Mpm indx calc function based on left and top available modes |
| * |
| * \param[in] top_intra_mode : Top available intra mode |
| * \param[in] left_intra_mode : Left available intra mode |
| * \param[in] available_top : Top availability flag |
| * \param[in] available_left : Left availability flag |
| * \param[in] cu_pos_y : cu position wrt to CTB |
| * \param[in] mode_bits_cost : pointer to mode bits buffer |
| * \param[in] lambda : Lambda value (SAD/SATD) |
| * \param[in] cand_mode_list : pointer to candidate list buffer |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_intra_populate_mode_bits_cost_bracketing( |
| WORD32 top_intra_mode, |
| WORD32 left_intra_mode, |
| WORD32 available_top, |
| WORD32 available_left, |
| WORD32 cu_pos_y, |
| UWORD16 *mode_bits_cost, |
| UWORD16 *mode_bits, |
| WORD32 lambda, |
| WORD32 *cand_mode_list) |
| { |
| /* local variables */ |
| WORD32 i; |
| WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top; |
| |
| UWORD16 one_bits_cost = |
| COMPUTE_RATE_COST_CLIP30(4, lambda, (LAMBDA_Q_SHIFT + 1)); //1.5 * lambda |
| UWORD16 two_bits_cost = |
| COMPUTE_RATE_COST_CLIP30(6, lambda, (LAMBDA_Q_SHIFT + 1)); //2.5 * lambda |
| UWORD16 five_bits_cost = |
| COMPUTE_RATE_COST_CLIP30(12, lambda, (LAMBDA_Q_SHIFT + 1)); //5.5 * lambda |
| |
| for(i = 0; i < 35; i++) |
| { |
| mode_bits_cost[i] = five_bits_cost; |
| mode_bits[i] = 5; |
| } |
| |
| /* EIID: set availability flag to zero if modes are invalid. |
| Required since some CU's might be skipped (though available) |
| and their modes will be set to 255 (-1)*/ |
| if(35 < top_intra_mode || 0 > top_intra_mode) |
| available_top = 0; |
| if(35 < left_intra_mode || 0 > left_intra_mode) |
| available_left = 0; |
| |
| /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */ |
| /* N = top */ |
| if(0 == available_top) |
| { |
| cand_intra_pred_mode_top = INTRA_DC; |
| } |
| /* for neighbour != INTRA, setting DC is done outside */ |
| else if(0 == cu_pos_y) /* It's on the CTB boundary */ |
| { |
| cand_intra_pred_mode_top = INTRA_DC; |
| } |
| else |
| { |
| cand_intra_pred_mode_top = top_intra_mode; |
| } |
| |
| /* N = left */ |
| if(0 == available_left) |
| { |
| cand_intra_pred_mode_left = INTRA_DC; |
| //cand_intra_pred_mode_left = cand_intra_pred_mode_top; |
| } |
| /* for neighbour != INTRA, setting DC is done outside */ |
| else |
| { |
| cand_intra_pred_mode_left = left_intra_mode; |
| } |
| |
| /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */ |
| if(cand_intra_pred_mode_left == cand_intra_pred_mode_top) |
| { |
| if(cand_intra_pred_mode_left < 2) |
| { |
| cand_mode_list[0] = INTRA_PLANAR; |
| cand_mode_list[1] = INTRA_DC; |
| cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */ |
| } |
| else |
| { |
| cand_mode_list[0] = cand_intra_pred_mode_left; |
| cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32); |
| cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32); |
| } |
| } |
| else |
| { |
| if(0 == available_left) |
| { |
| cand_mode_list[0] = cand_intra_pred_mode_top; |
| cand_mode_list[1] = cand_intra_pred_mode_left; |
| } |
| else |
| { |
| cand_mode_list[0] = cand_intra_pred_mode_left; |
| cand_mode_list[1] = cand_intra_pred_mode_top; |
| } |
| if((cand_intra_pred_mode_left != INTRA_PLANAR) && |
| (cand_intra_pred_mode_top != INTRA_PLANAR)) |
| { |
| cand_mode_list[2] = INTRA_PLANAR; |
| } |
| else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC)) |
| { |
| cand_mode_list[2] = INTRA_DC; |
| } |
| else |
| { |
| cand_mode_list[2] = INTRA_ANGULAR(26); |
| } |
| } |
| mode_bits_cost[cand_mode_list[0]] = one_bits_cost; |
| mode_bits_cost[cand_mode_list[1]] = two_bits_cost; |
| mode_bits_cost[cand_mode_list[2]] = two_bits_cost; |
| |
| mode_bits[cand_mode_list[0]] = 2; |
| mode_bits[cand_mode_list[1]] = 3; |
| mode_bits[cand_mode_list[2]] = 3; |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_pu_calc_4x4_blk \endif |
| * |
| * \brief |
| * 4x4 pu (8x8 CU) mode decision using step 8421 method |
| * |
| * \param[in] ps_cu_node : pointer to cu node info buffer |
| * \param[in] pu1_src : pointer to src pixels |
| * \param[in] src_stride : frm source stride |
| * \param[in] ref : pointer to reference pixels for prediction |
| * \param[in] cand_mode_list : pointer to candidate list buffer |
| * \param[in] best_costs_4x4 : pointer to 3 best cost buffer |
| * \param[in] best_modes_4x4 : pointer to 3 best mode buffer |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_pu_calc_4x4_blk( |
| ihevce_ipe_ctxt_t *ps_ctxt, |
| ihevce_ipe_cu_tree_t *ps_cu_node, |
| UWORD8 *pu1_src, |
| WORD32 src_stride, |
| UWORD8 *ref, |
| UWORD16 *mode_bits_cost, |
| WORD32 *best_costs_4x4, |
| UWORD8 *best_modes_4x4, |
| func_selector_t *ps_func_selector) |
| { |
| WORD16 *pi2_trans_tmp = ps_ctxt->pi2_trans_tmp; |
| WORD16 *pi2_trans_out = ps_ctxt->pi2_trans_out; |
| UWORD8 u1_use_satd = ps_ctxt->u1_use_satd; |
| UWORD8 u1_level_1_refine_on = ps_ctxt->u1_level_1_refine_on; |
| |
| WORD32 i, j = 0, i_end; |
| UWORD8 mode, best_amode = 255; |
| UWORD8 pred[16]; |
| |
| UWORD16 sad; |
| WORD32 sad_cost = 0; |
| WORD32 best_asad_cost = 0xFFFFF; |
| WORD32 temp; |
| UWORD8 modes_to_eval[5]; |
| WORD32 costs_4x4[5]; |
| UWORD8 modes_4x4[5] = { 0, 1, 2, 3, 4 }; |
| |
| /* LO resolution hence low resolution disable */ |
| WORD32 u1_low_resol = 0; |
| UWORD8 au1_best_modes[1] = { 0 }; |
| WORD32 ai4_best_sad_costs[1] = { 0 }; |
| |
| WORD16 *pi2_tmp = &pi2_trans_tmp[0]; |
| |
| ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list = |
| &ps_ctxt->s_ipe_optimised_function_list; |
| |
| //apf_resd_trns[0] = &ihevc_resi_trans_4x4_ttype1; |
| //apf_resd_trns[0] = &ihevc_HAD_4x4_8bit; |
| |
| for(i = 0; i < 5; i++) |
| { |
| costs_4x4[i] = MAX_INTRA_COST_IPE; |
| } |
| |
| ps_ipe_optimised_function_list->pf_ed_4x4_find_best_modes( |
| pu1_src, |
| src_stride, |
| ref, |
| mode_bits_cost, |
| au1_best_modes, |
| ai4_best_sad_costs, |
| u1_low_resol, |
| ps_ipe_optimised_function_list->pf_4x4_sad_computer); |
| |
| best_amode = au1_best_modes[0]; |
| best_asad_cost = ai4_best_sad_costs[0]; |
| |
| ASSERT(best_amode != 255); |
| /* Around best level 4 angular mode, search for best level 2 mode */ |
| modes_to_eval[0] = best_amode - 2; |
| modes_to_eval[1] = best_amode + 2; |
| i = 0; |
| i_end = 2; |
| if(best_amode == 2) |
| i = 1; |
| else if(best_amode == 34) |
| i_end = 1; |
| for(; i < i_end; i++) |
| { |
| mode = modes_to_eval[i]; |
| |
| g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode); |
| |
| sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(pu1_src, &pred[0], src_stride, 4); |
| |
| sad_cost = sad; |
| sad_cost += mode_bits_cost[mode]; |
| |
| if(sad_cost < best_asad_cost) |
| { |
| best_amode = mode; |
| best_asad_cost = sad_cost; |
| } |
| } |
| |
| /* Around best level 2 angular mode, search for best level 1 mode */ |
| /* Also evaluate for non-angular mode */ |
| |
| i = 0; |
| /*Level 1 refinement is disabled for ES preset */ |
| if(1 == u1_level_1_refine_on) |
| { |
| if(best_amode != 2) |
| modes_to_eval[i++] = best_amode - 1; |
| modes_to_eval[i++] = best_amode; |
| } |
| |
| modes_to_eval[i++] = 0; |
| modes_to_eval[i++] = 1; |
| |
| if(1 == u1_level_1_refine_on) |
| { |
| if(best_amode != 34) |
| modes_to_eval[i++] = best_amode + 1; |
| } |
| i_end = i; |
| i = 0; |
| |
| for(; i < i_end; i++) |
| { |
| mode = modes_to_eval[i]; |
| |
| g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode); |
| |
| /* Hard coding to use SATD */ |
| if(u1_use_satd) |
| { |
| ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr( |
| pu1_src, &pred[0], (WORD32 *)pi2_tmp, pi2_trans_out, src_stride, 4, (4 << 16) | 0); |
| |
| sad = ihevce_ipe_pass_satd(pi2_trans_out, 4, 4); |
| } |
| else |
| { |
| sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer( |
| pu1_src, &pred[0], src_stride, 4); |
| } |
| sad_cost = sad; |
| sad_cost += mode_bits_cost[mode]; |
| |
| costs_4x4[i] = sad_cost; |
| } |
| |
| /* Arrange the reference array in ascending order */ |
| for(i = 0; i < (i_end - 1); i++) |
| { |
| for(j = i + 1; j < i_end; j++) |
| { |
| if(costs_4x4[i] > costs_4x4[j]) |
| { |
| temp = costs_4x4[i]; |
| costs_4x4[i] = costs_4x4[j]; |
| costs_4x4[j] = temp; |
| |
| temp = modes_4x4[i]; |
| modes_4x4[i] = modes_4x4[j]; |
| modes_4x4[j] = temp; |
| } |
| } |
| } |
| for(i = 0; i < 3; i++) |
| { |
| best_costs_4x4[i] = costs_4x4[i]; |
| best_modes_4x4[i] = modes_to_eval[modes_4x4[i]]; |
| } |
| |
| { |
| ps_cu_node->best_mode = best_modes_4x4[0]; |
| ps_cu_node->best_cost = best_costs_4x4[0]; |
| ps_cu_node->best_satd = best_costs_4x4[0] - mode_bits_cost[ps_cu_node->best_mode]; |
| } |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_pu_calc_8x8_blk \endif |
| * |
| * \brief |
| * 4x4 pu (8x8 CU) mode decision loop using step 8421 method |
| * |
| * \param[in] ps_curr_src : pointer to src pixels struct |
| * \param[in] ps_ctxt : pointer to IPE context struct |
| * \param[in] ps_cu_node : pointer to cu node info buffer |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_pu_calc_8x8_blk( |
| iv_enc_yuv_buf_t *ps_curr_src, |
| ihevce_ipe_ctxt_t *ps_ctxt, |
| ihevce_ipe_cu_tree_t *ps_cu_node, |
| func_selector_t *ps_func_selector) |
| { |
| WORD32 i, j; |
| WORD32 nbr_flags; |
| nbr_avail_flags_t s_nbr; |
| WORD32 trans_size = ps_cu_node->ps_parent->u1_cu_size >> 1; |
| |
| UWORD8 *pu1_src_4x4; |
| WORD32 xA, xB, yA, yB; |
| //WORD32 x, y, size; |
| WORD32 top_intra_mode; |
| WORD32 left_intra_mode; |
| // WORD8 *top_intra_mode_ptr; |
| // WORD8 *left_intra_mode_ptr; |
| UWORD8 *pu1_orig; |
| WORD32 src_strd = ps_curr_src->i4_y_strd; |
| |
| WORD32 cu_pos_x = ps_cu_node->ps_parent->u2_x0 << 1; |
| WORD32 cu_pos_y = ps_cu_node->ps_parent->u2_y0 << 1; |
| ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr; |
| |
| ihevc_intra_pred_luma_ref_substitution_fptr = |
| ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr; |
| |
| pu1_orig = (UWORD8 *)(ps_curr_src->pv_y_buf) + |
| ((ps_cu_node->ps_parent->u2_y0 << 3) * src_strd) + |
| (ps_cu_node->ps_parent->u2_x0 << 3); |
| for(i = 0; i < 2; i++) |
| { |
| for(j = 0; j < 2; j++) |
| { |
| WORD32 cand_mode_list[3]; |
| pu1_src_4x4 = pu1_orig + (i * trans_size * src_strd) + (j * trans_size); |
| /* get the neighbour availability flags */ |
| nbr_flags = ihevce_get_nbr_intra( |
| &s_nbr, |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| cu_pos_x + ((j) * (trans_size >> 2)), |
| cu_pos_y + ((i) * (trans_size >> 2)), |
| trans_size >> 2); |
| |
| /* call the function which populates sad cost for all the modes */ |
| xA = ((ps_cu_node->ps_parent->u2_x0 << 3) >> 2) + j; |
| yA = ((ps_cu_node->ps_parent->u2_y0 << 3) >> 2) + 1 + i; |
| xB = xA + 1; |
| yB = yA - 1; |
| left_intra_mode = ps_ctxt->au1_ctb_mode_map[yA][xA]; |
| top_intra_mode = ps_ctxt->au1_ctb_mode_map[yB][xB]; |
| |
| ihevce_intra_populate_mode_bits_cost_bracketing( |
| top_intra_mode, |
| left_intra_mode, |
| s_nbr.u1_top_avail, |
| s_nbr.u1_left_avail, |
| ps_cu_node->ps_parent->u2_y0, |
| &ps_ctxt->au2_mode_bits_cost_8x8pu[i * 2 + j][0], |
| &ps_ctxt->au2_mode_bits_8x8_pu[0], |
| ps_ctxt->i4_ol_sad_lambda, |
| cand_mode_list); |
| |
| /* call the function which populates ref data for intra predicion */ |
| ihevc_intra_pred_luma_ref_substitution_fptr( |
| pu1_src_4x4 - src_strd - 1, |
| pu1_src_4x4 - src_strd, |
| pu1_src_4x4 - 1, |
| src_strd, |
| 4, |
| nbr_flags, |
| &ps_ctxt->au1_ref_8x8pu[i * 2 + j][0], |
| 0); |
| |
| ihevce_pu_calc_4x4_blk( |
| ps_ctxt, |
| ps_cu_node->ps_sub_cu[(i * 2) + j], |
| pu1_src_4x4, |
| src_strd, |
| &ps_ctxt->au1_ref_8x8pu[i * 2 + j][0], |
| &ps_ctxt->au2_mode_bits_cost_8x8pu[i * 2 + j][0], |
| &ps_cu_node->ps_sub_cu[(i * 2) + j]->au4_best_cost_1tu[0], |
| &ps_cu_node->ps_sub_cu[(i * 2) + j]->au1_best_mode_1tu[0], |
| ps_func_selector); |
| |
| /*&au4_cost_4x4[i*2 + j][0], |
| &au1_modes_4x4[i*2 + j][0]);*/ //TTODO : mode will change for the four partition |
| |
| ihevce_set_nbr_map( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| cu_pos_x + ((j) * (trans_size >> 2)), |
| cu_pos_y + ((i) * (trans_size >> 2)), |
| (trans_size >> 2), |
| 1); |
| |
| xA = ((ps_cu_node->ps_parent->u2_x0 << 3) >> 2) + 1 + j; |
| yA = ((ps_cu_node->ps_parent->u2_y0 << 3) >> 2) + 1 + i; |
| ps_ctxt->au1_ctb_mode_map[yA][xA] = ps_cu_node->ps_sub_cu[i * 2 + j]->best_mode; |
| ps_cu_node->ps_sub_cu[i * 2 + j]->u2_mode_bits_cost = |
| ps_ctxt->au2_mode_bits_8x8_pu[ps_cu_node->ps_sub_cu[i * 2 + j]->best_mode]; |
| } |
| } |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_bracketing_analysis \endif |
| * |
| * \brief |
| * Interface function that evaluates MAX cu and MAX - 1 cu, with MAX cu size |
| * info decided coarse resolution mode decision. Compares the SATD/SAD cost btwn |
| * 2 CUS and determines the actual CU size and best 3 modes to be given to rdopt |
| * |
| * \param[in] ps_ctxt : pointer to IPE context struct |
| * \param[in] ps_cu_node : pointer to cu node info buffer |
| * \param[in] ps_curr_src : pointer to src pixels struct |
| * \param[in] ps_ctb_out : pointer to ip ctb out struct |
| * \param[in] ps_row_cu : pointer to cu analyse struct |
| * \param[in] ps_ed_l1_ctb : pointer to level 1 early deci struct |
| * \param[in] ps_ed_l2_ctb : pointer to level 2 early deci struct |
| * \param[in] ps_l0_ipe_out_ctb : pointer to ipe_l0_ctb_analyse_for_me_t struct |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_bracketing_analysis( |
| ihevce_ipe_ctxt_t *ps_ctxt, |
| ihevce_ipe_cu_tree_t *ps_cu_node, |
| iv_enc_yuv_buf_t *ps_curr_src, |
| ctb_analyse_t *ps_ctb_out, |
| //cu_analyse_t *ps_row_cu, |
| ihevce_ed_blk_t *ps_ed_l1_ctb, |
| ihevce_ed_blk_t *ps_ed_l2_ctb, |
| ihevce_ed_ctb_l1_t *ps_ed_ctb_l1, |
| ipe_l0_ctb_analyse_for_me_t *ps_l0_ipe_out_ctb) |
| { |
| WORD32 cu_pos_x = 0; |
| WORD32 cu_pos_y = 0; |
| |
| UWORD8 u1_curr_ctb_wdt = ps_cu_node->u1_width; |
| UWORD8 u1_curr_ctb_hgt = ps_cu_node->u1_height; |
| WORD32 num_8x8_blks_x = (u1_curr_ctb_wdt >> 3); |
| WORD32 num_8x8_blks_y = (u1_curr_ctb_hgt >> 3); |
| |
| ihevce_ed_blk_t *ps_ed_blk_l1 = ps_ed_l1_ctb; |
| ihevce_ed_blk_t *ps_ed_blk_l2 = ps_ed_l2_ctb; |
| |
| WORD32 i; |
| WORD32 cand_mode_list[3]; |
| //cu_analyse_t *ps_curr_cu = ps_row_cu; |
| WORD32 blk_cnt = 0; |
| WORD32 j = 0; |
| WORD32 merge_32x32_l1, merge_32x32_l2; |
| |
| WORD32 i4_skip_intra_eval_32x32_l1; |
| //EIID: flag indicating number of 16x16 blocks to be skipped for intra evaluation within 32x32 block |
| |
| WORD32 parent_cost = 0; |
| WORD32 child_cost[4] = { 0 }; |
| WORD32 child_cost_least = 0; |
| WORD32 child_satd[4] = { 0 }; |
| WORD32 x, y, size; |
| WORD32 merge_64x64 = 1; |
| UWORD8 au1_best_32x32_modes[4]; |
| WORD32 au4_best_32x32_cost[4]; |
| WORD32 parent_best_mode; |
| UWORD8 best_mode; |
| |
| WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset; |
| /* flag to control 1CU-4TU modes based on quality preset */ |
| /* if set 1CU-4TU are explicity evaluated else 1CU-1TU modes are copied */ |
| WORD32 i4_enable_1cu_4tu = (i4_quality_preset == IHEVCE_QUALITY_P2) || |
| (i4_quality_preset == IHEVCE_QUALITY_P0); |
| |
| /* flag to control 4CU-16TU mode based on quality preset */ |
| /* if set 4CU-16TU are explicity evaluated else 4CU-4TU modes are copied*/ |
| WORD32 i4_enable_4cu_16tu = (i4_quality_preset == IHEVCE_QUALITY_P2) || |
| (i4_quality_preset == IHEVCE_QUALITY_P0); |
| |
| WORD32 i4_mod_factor_num, i4_mod_factor_den = QP_MOD_FACTOR_DEN; //2; |
| float f_strength; |
| /* Accumalte satd */ |
| LWORD64 i8_frame_acc_satd_cost = 0, i8_frame_acc_satd_by_modqp_q10 = 0; |
| WORD32 i4_ctb_acc_satd = 0; |
| |
| /* Accumalate Mode bits cost */ |
| LWORD64 i8_frame_acc_mode_bits_cost = 0; |
| |
| /* Step2 is bypassed for parent, uses children modes*/ |
| WORD32 step2_bypass = 1; |
| |
| if(1 == ps_ctxt->u1_disable_child_cu_decide) |
| step2_bypass = 0; |
| |
| ps_cu_node->ps_parent = ps_ctxt->ps_ipe_cu_tree; |
| for(i = 0; i < 4; i++) |
| { |
| ps_cu_node->ps_sub_cu[i] = ps_ctxt->ps_ipe_cu_tree + 1 + i; |
| } |
| |
| /* Loop for all 8x8 block in a CTB */ |
| ps_ctb_out->u4_cu_split_flags = 0x1; |
| |
| /* Initialize intra 64x64, 32x32 and 16x16 costs to max value */ |
| for(i = 0; i < (MAX_CU_IN_CTB >> 4); i++) |
| { |
| ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i] = MAX_INTRA_COST_IPE; |
| } |
| |
| for(i = 0; i < (MAX_CU_IN_CTB >> 2); i++) |
| { |
| ps_l0_ipe_out_ctb->ai4_best16x16_intra_cost[i] = MAX_INTRA_COST_IPE; |
| } |
| |
| for(i = 0; i < (MAX_CU_IN_CTB); i++) |
| { |
| ps_l0_ipe_out_ctb->ai4_best8x8_intra_cost[i] = MAX_INTRA_COST_IPE; |
| } |
| |
| ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = MAX_INTRA_COST_IPE; |
| |
| /* by default 64x64 modes are set to default values DC and Planar */ |
| ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[0] = 0; |
| ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[1] = 1; |
| ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[2] = 255; |
| |
| /* by default 64x4 split is set to 1 */ |
| ps_l0_ipe_out_ctb->u1_split_flag = 1; |
| |
| /* Modulation factor calculated based on spatial variance instead of hardcoded val*/ |
| i4_mod_factor_num = ps_ctxt->ai4_mod_factor_derived_by_variance[1]; //16; |
| |
| f_strength = ps_ctxt->f_strength; |
| |
| /* ------------------------------------------------ */ |
| /* populate the early decisions done by L1 analysis */ |
| /* ------------------------------------------------ */ |
| { |
| ihevce_ed_blk_t *ps_ed_blk_l1_curr = ps_ed_l1_ctb; |
| WORD32 ctr_8x8; |
| WORD8 *pi1_ed_buf; |
| |
| /* set all the decisions to invalid */ |
| memset( |
| &ps_l0_ipe_out_ctb->ai1_early_intra_inter_decision[0], |
| 0, |
| sizeof(UWORD8) * MAX_CU_IN_CTB); |
| |
| pi1_ed_buf = &ps_l0_ipe_out_ctb->ai1_early_intra_inter_decision[0]; |
| |
| for(ctr_8x8 = 0; ctr_8x8 < MAX_CTB_SIZE; ctr_8x8++) |
| { |
| WORD32 pos_x_8x8, pos_y_8x8; |
| |
| pos_x_8x8 = gau1_cu_pos_x[ctr_8x8]; |
| pos_y_8x8 = gau1_cu_pos_y[ctr_8x8]; |
| |
| pi1_ed_buf[pos_x_8x8 + (pos_y_8x8 * MAX_CU_IN_CTB_ROW)] = |
| ps_ed_blk_l1_curr->intra_or_inter; |
| ps_ed_blk_l1_curr++; |
| } |
| |
| for(ctr_8x8 = 0; ctr_8x8 < (MAX_CU_IN_CTB >> 2); ctr_8x8++) |
| { |
| ps_l0_ipe_out_ctb->ai4_best_sad_8x8_l1_ipe[ctr_8x8] = |
| ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[ctr_8x8]; |
| |
| ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_ipe[ctr_8x8] = |
| ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[ctr_8x8]; |
| |
| /*Earlier only me sad was getting populated, now best of ipe and me is populated*/ |
| ps_l0_ipe_out_ctb->ai4_best_sad_8x8_l1_me[ctr_8x8] = |
| ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[ctr_8x8]; |
| //ps_ed_ctb_l1->i4_sad_me_for_ref[ctr_8x8]; |
| |
| ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_me[ctr_8x8] = |
| ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_me[ctr_8x8]; |
| //ps_ed_ctb_l1->i4_sad_cost_me_for_ref[ctr_8x8]; |
| } |
| |
| /*Init CTB level accumalated SATD and MPM bits */ |
| ps_l0_ipe_out_ctb->i4_ctb_acc_satd = 0; |
| ps_l0_ipe_out_ctb->i4_ctb_acc_mpm_bits = 0; |
| } |
| |
| /* ------------------------------------------------ */ |
| /* Loop over all the blocks in current CTB */ |
| /* ------------------------------------------------ */ |
| |
| { |
| /* 64 8x8 blocks should be encountered for the do,while loop to exit */ |
| do |
| { |
| intra32_analyse_t *ps_intra32_analyse; |
| intra16_analyse_t *ps_intra16_analyse; |
| WORD32 *pi4_intra_32_cost; |
| WORD32 *pi4_intra_16_cost; |
| WORD32 *pi4_intra_8_cost; |
| WORD32 merge_16x16_l1; |
| |
| /* Given the blk_cnt, get the CU's top-left 8x8 block's x and y positions within the CTB */ |
| cu_pos_x = gau1_cu_pos_x[blk_cnt]; |
| cu_pos_y = gau1_cu_pos_y[blk_cnt]; |
| |
| /* default value for 32x32 best mode - blk_cnt increases by 16 for each 32x32 */ |
| au1_best_32x32_modes[blk_cnt >> 4] = 255; |
| |
| /* get the corresponding intra 32 analyse pointer use (blk_cnt / 16) */ |
| /* blk cnt is in terms of 8x8 units so a 32x32 will have 16 8x8 units */ |
| ps_intra32_analyse = &ps_l0_ipe_out_ctb->as_intra32_analyse[blk_cnt >> 4]; |
| |
| /* get the corresponding intra 16 analyse pointer use (blk_cnt & 0xF / 4)*/ |
| /* blk cnt is in terms of 8x8 units so a 16x16 will have 4 8x8 units */ |
| ps_intra16_analyse = &ps_intra32_analyse->as_intra16_analyse[(blk_cnt & 0xF) >> 2]; |
| |
| /* Line below assumes min_cu_size of 8 - checks whether CU starts are within picture */ |
| if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y)) |
| { |
| /* Reset to zero for every cu decision */ |
| merge_32x32_l1 = 0; |
| |
| child_cost_least = 0; |
| |
| /* At L2, each 4x4 corresponds to 16x16 at L0. Every 4 16x16 stores a merge_success flag */ |
| ps_ed_blk_l2 = ps_ed_l2_ctb + (blk_cnt >> 2); |
| |
| pi4_intra_32_cost = &ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[blk_cnt >> 4]; |
| |
| /* by default 32x32 modes are set to default values DC and Planar */ |
| ps_intra32_analyse->au1_best_modes_32x32_tu[0] = 0; |
| ps_intra32_analyse->au1_best_modes_32x32_tu[1] = 1; |
| ps_intra32_analyse->au1_best_modes_32x32_tu[2] = 255; |
| |
| /* By default 32x32 split is set to 1 */ |
| ps_intra32_analyse->b1_split_flag = 1; |
| |
| ps_intra32_analyse->au1_best_modes_16x16_tu[0] = 0; |
| ps_intra32_analyse->au1_best_modes_16x16_tu[1] = 1; |
| ps_intra32_analyse->au1_best_modes_16x16_tu[2] = 255; |
| |
| /* 16x16 cost & 8x8 cost are stored in Raster scan order */ |
| /* stride of 16x16 buffer is MAX_CU_IN_CTB_ROW >> 1 */ |
| /* stride of 8x8 buffer is MAX_CU_IN_CTB_ROW */ |
| { |
| WORD32 pos_x_8x8, pos_y_8x8; |
| |
| pos_x_8x8 = gau1_cu_pos_x[blk_cnt]; |
| pos_y_8x8 = gau1_cu_pos_y[blk_cnt]; |
| |
| pi4_intra_16_cost = &ps_l0_ipe_out_ctb->ai4_best16x16_intra_cost[0]; |
| |
| pi4_intra_16_cost += |
| ((pos_x_8x8 >> 1) + ((pos_y_8x8 >> 1) * (MAX_CU_IN_CTB_ROW >> 1))); |
| |
| pi4_intra_8_cost = &ps_l0_ipe_out_ctb->ai4_best8x8_intra_cost[0]; |
| |
| pi4_intra_8_cost += (pos_x_8x8 + (pos_y_8x8 * MAX_CU_IN_CTB_ROW)); |
| } |
| |
| merge_32x32_l1 = 0; |
| merge_32x32_l2 = 0; |
| i4_skip_intra_eval_32x32_l1 = 0; |
| |
| /* Enable 16x16 merge iff sufficient 8x8 blocks remain in the current CTB */ |
| merge_16x16_l1 = 0; |
| if(((num_8x8_blks_x - cu_pos_x) >= 2) && ((num_8x8_blks_y - cu_pos_y) >= 2)) |
| { |
| #if !ENABLE_UNIFORM_CU_SIZE_8x8 |
| merge_16x16_l1 = ps_ed_blk_l1->merge_success; |
| #else |
| merge_16x16_l1 = 0; |
| #endif |
| } |
| |
| /* Enable 32x32 merge iff sufficient 8x8 blocks remain in the current CTB */ |
| if(((num_8x8_blks_x - cu_pos_x) >= 4) && ((num_8x8_blks_y - cu_pos_y) >= 4)) |
| { |
| /* Check 4 flags of L1(8x8) say merge */ |
| for(i = 0; i < 4; i++) |
| { |
| merge_32x32_l1 += (ps_ed_blk_l1 + (i * 4))->merge_success; |
| |
| //EIDD: num 16x16 blocks for which inter_intra flag says eval only inter, i.e. skip intra eval |
| i4_skip_intra_eval_32x32_l1 += |
| ((ps_ed_blk_l1 + (i * 4))->intra_or_inter == 2) ? 1 : 0; |
| } |
| |
| #if !ENABLE_UNIFORM_CU_SIZE_8x8 |
| /* Check 1 flag from L2(16x16) say merge */ |
| merge_32x32_l2 = ps_ed_blk_l2->merge_success; |
| #else |
| merge_32x32_l1 = 0; |
| merge_32x32_l2 = 0; |
| #endif |
| } |
| |
| #if DISABLE_L2_IPE_IN_PB_L1_IN_B |
| if((i4_quality_preset == IHEVCE_QUALITY_P6) && (ps_ctxt->i4_slice_type != ISLICE)) |
| { |
| merge_32x32_l2 = 0; |
| ps_ed_blk_l2->merge_success = 0; |
| } |
| #endif |
| |
| ps_intra32_analyse->b1_valid_cu = 1; |
| |
| /* If Merge success from all 4 L1 and L2, max CU size 32x32 is chosen */ |
| /* EIID: if all blocks to be skipped then skip entire 32x32 for intra eval, |
| if no blocks to be skipped then eval entire 32x32, |
| else break the merge and go to 16x16 level eval */ |
| if((merge_32x32_l1 == 4) && merge_32x32_l2 && |
| ((i4_skip_intra_eval_32x32_l1 == 0) || |
| (i4_skip_intra_eval_32x32_l1 == 4)) //comment this line to disable break-merge |
| ) |
| { |
| #if IP_DBG_L1_l2 |
| /* Populate params for 32x32 block analysis */ |
| ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE; |
| |
| ps_cu_node->ps_parent->u1_cu_size = 32; |
| ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */ |
| ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */ |
| ps_cu_node->ps_parent->best_mode = ps_ed_blk_l2->best_merge_mode; |
| /* CU size 32x32 and fill the final cu params */ |
| |
| ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); |
| |
| /* Increment pointers */ |
| ps_ed_blk_l1 += 16; |
| blk_cnt += 16; |
| ps_row_cu++; |
| merge_64x64 &= 1; |
| #else |
| |
| /* EIID: dont evaluate if all 4 blocks at L1 said inter is winning*/ |
| if(4 == i4_skip_intra_eval_32x32_l1 && (ps_ctxt->i4_slice_type != ISLICE)) |
| { |
| WORD32 i4_local_ctr1, i4_local_ctr2; |
| |
| ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE; |
| |
| ps_cu_node->ps_parent->u1_cu_size = 32; |
| ps_cu_node->ps_parent->u2_x0 = |
| gau1_cu_pos_x[blk_cnt]; /* Populate properly */ |
| ps_cu_node->ps_parent->u2_y0 = |
| gau1_cu_pos_y[blk_cnt]; /* Populate properly */ |
| ps_cu_node->ps_parent->best_mode = |
| INTRA_DC; //ps_ed_blk_l2->best_merge_mode; |
| /* CU size 32x32 and fill the final cu params */ |
| |
| /* fill in the first modes as invalid */ |
| ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC; |
| ps_cu_node->ps_parent->au1_best_mode_1tu[1] = |
| INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3 |
| ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC; |
| |
| ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC; |
| ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC; |
| ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC; |
| |
| ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); |
| |
| //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0; |
| //ps_row_cu->u1_num_intra_rdopt_cands = 0; |
| |
| ps_intra32_analyse->b1_valid_cu = 0; |
| ps_intra32_analyse->b1_split_flag = 0; |
| ps_intra32_analyse->b1_merge_flag = 0; |
| /*memset (&ps_intra32_analyse->au1_best_modes_32x32_tu, |
| 255, |
| NUM_BEST_MODES); |
| memset (&ps_intra32_analyse->au1_best_modes_16x16_tu, |
| 255, |
| NUM_BEST_MODES);*/ |
| //set only first mode since if it's 255. it wont go ahead |
| ps_intra32_analyse->au1_best_modes_32x32_tu[0] = 255; |
| ps_intra32_analyse->au1_best_modes_16x16_tu[0] = 255; |
| ps_intra32_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE; |
| |
| *pi4_intra_32_cost = MAX_INTRA_COST_IPE; |
| |
| /*since ME will start evaluating from bottom up, set the lower |
| cu size data invalid */ |
| for(i4_local_ctr1 = 0; i4_local_ctr1 < 4; i4_local_ctr1++) |
| { |
| WORD32 *pi4_intra_8_cost_curr16; |
| |
| ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
| .au1_best_modes_16x16_tu[0] = 255; |
| ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
| .au1_best_modes_8x8_tu[0] = 255; |
| ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
| .i4_best_intra_cost = MAX_INTRA_COST_IPE; |
| ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_merge_flag = 0; |
| ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_valid_cu = 0; |
| ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_split_flag = 0; |
| |
| pi4_intra_16_cost |
| [(i4_local_ctr1 & 1) + ((MAX_CU_IN_CTB_ROW >> 1) * |
| (i4_local_ctr1 >> 1))] = MAX_INTRA_COST_IPE; |
| |
| pi4_intra_8_cost_curr16 = pi4_intra_8_cost + ((i4_local_ctr1 & 1) << 1); |
| pi4_intra_8_cost_curr16 += |
| ((i4_local_ctr1 >> 1) << 1) * MAX_CU_IN_CTB_ROW; |
| |
| for(i4_local_ctr2 = 0; i4_local_ctr2 < 4; i4_local_ctr2++) |
| { |
| ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
| .as_intra8_analyse[i4_local_ctr2] |
| .au1_4x4_best_modes[0][0] = 255; |
| ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
| .as_intra8_analyse[i4_local_ctr2] |
| .au1_4x4_best_modes[1][0] = 255; |
| ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
| .as_intra8_analyse[i4_local_ctr2] |
| .au1_4x4_best_modes[2][0] = 255; |
| ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
| .as_intra8_analyse[i4_local_ctr2] |
| .au1_4x4_best_modes[3][0] = 255; |
| ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
| .as_intra8_analyse[i4_local_ctr2] |
| .au1_best_modes_8x8_tu[0] = 255; |
| ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
| .as_intra8_analyse[i4_local_ctr2] |
| .au1_best_modes_4x4_tu[0] = 255; |
| ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
| .as_intra8_analyse[i4_local_ctr2] |
| .i4_best_intra_cost = MAX_INTRA_COST_IPE; |
| ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
| .as_intra8_analyse[i4_local_ctr2] |
| .b1_valid_cu = 0; |
| |
| pi4_intra_8_cost_curr16 |
| [(i4_local_ctr2 & 1) + |
| (MAX_CU_IN_CTB_ROW * (i4_local_ctr2 >> 1))] = |
| MAX_INTRA_COST_IPE; |
| } |
| } |
| |
| /* set neighbours even if intra is not evaluated, since source is always available. */ |
| ihevce_set_nbr_map( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| ps_cu_node->ps_parent->u2_x0 << 1, |
| ps_cu_node->ps_parent->u2_y0 << 1, |
| (ps_cu_node->ps_parent->u1_cu_size >> 2), |
| 1); |
| |
| /* cost accumalation of best cu size candiate */ |
| /*i8_frame_acc_satd_cost += parent_cost;*/ |
| |
| /* Mode bits cost accumalation for best cu size and cu mode */ |
| /*i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost;*/ |
| |
| /*satd/mod_qp accumulation of best cu */ |
| /*i8_frame_acc_satd_by_modqp_q10 += ((LWORD64)ps_cu_node->ps_parent->best_satd << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3))/i4_q_scale_q3_mod;*/ |
| |
| /* Increment pointers */ |
| ps_ed_blk_l1 += 16; |
| blk_cnt += 16; |
| //ps_row_cu++; |
| merge_64x64 = 0; |
| |
| /* increment for stat purpose only. Increment is valid only on single thread */ |
| ps_ctxt->u4_num_16x16_skips_at_L0_IPE += 4; |
| } |
| else |
| { |
| /* Revaluation of 4 16x16 blocks at 8x8 prediction level */ |
| //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); |
| |
| if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) && |
| (ps_ctxt->i4_slice_type == PSLICE)) |
| { |
| ps_ctxt->u1_disable_child_cu_decide = 1; |
| step2_bypass = 0; |
| } |
| |
| /* Based on the flag, Child modes decision can be disabled*/ |
| if(0 == ps_ctxt->u1_disable_child_cu_decide) |
| { |
| for(j = 0; j < 4; j++) |
| { |
| ps_cu_node->ps_sub_cu[j]->u2_x0 = |
| gau1_cu_pos_x[blk_cnt + (j * 4)]; /* Populate properly */ |
| ps_cu_node->ps_sub_cu[j]->u2_y0 = |
| gau1_cu_pos_y[blk_cnt + (j * 4)]; /* Populate properly */ |
| ps_cu_node->ps_sub_cu[j]->u1_cu_size = 16; |
| |
| { |
| WORD32 best_ang_mode = |
| (ps_ed_blk_l1 + (j * 4))->best_merge_mode; |
| |
| if(best_ang_mode < 2) |
| best_ang_mode = 26; |
| |
| ihevce_mode_eval_filtering( |
| ps_cu_node->ps_sub_cu[j], |
| ps_cu_node, |
| ps_ctxt, |
| ps_curr_src, |
| best_ang_mode, |
| &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0], |
| &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], |
| !step2_bypass, |
| 1); |
| |
| if(i4_enable_4cu_16tu) |
| { |
| ihevce_mode_eval_filtering( |
| ps_cu_node->ps_sub_cu[j], |
| ps_cu_node, |
| ps_ctxt, |
| ps_curr_src, |
| best_ang_mode, |
| &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], |
| &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], |
| !step2_bypass, |
| 0); |
| } |
| else |
| { |
| /* 4TU not evaluated : 4tu modes set same as 1tu modes */ |
| memcpy( |
| &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], |
| &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], |
| NUM_BEST_MODES); |
| |
| /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ |
| memcpy( |
| &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], |
| &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0], |
| NUM_BEST_MODES * sizeof(WORD32)); |
| } |
| |
| child_cost[j] = |
| MIN(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], |
| ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]); |
| |
| /* Child cost is sum of costs at 16x16 level */ |
| child_cost_least += child_cost[j]; |
| |
| /* Select the best mode to be populated as top and left nbr depending on the |
| 4tu and 1tu cost */ |
| if(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0] > |
| ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]) |
| { |
| ps_cu_node->ps_sub_cu[j]->best_mode = |
| ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0]; |
| } |
| else |
| { |
| ps_cu_node->ps_sub_cu[j]->best_mode = |
| ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0]; |
| } |
| |
| { /* Update the CTB nodes only for MAX - 1 CU nodes */ |
| WORD32 xA, yA, row, col; |
| xA = ((ps_cu_node->ps_sub_cu[j]->u2_x0 << 3) >> 2) + 1; |
| yA = ((ps_cu_node->ps_sub_cu[j]->u2_y0 << 3) >> 2) + 1; |
| size = ps_cu_node->ps_sub_cu[j]->u1_cu_size >> 2; |
| for(row = yA; row < (yA + size); row++) |
| { |
| for(col = xA; col < (xA + size); col++) |
| { |
| ps_ctxt->au1_ctb_mode_map[row][col] = |
| ps_cu_node->ps_sub_cu[j]->best_mode; |
| } |
| } |
| } |
| } |
| |
| /*Child SATD cost*/ |
| child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd; |
| |
| /* store the child 16x16 costs */ |
| pi4_intra_16_cost[(j & 1) + ((MAX_CU_IN_CTB_ROW >> 1) * (j >> 1))] = |
| child_cost[j]; |
| |
| /* set the CU valid flag */ |
| ps_intra16_analyse[j].b1_valid_cu = 1; |
| |
| /* All 16x16 merge is valid, if Cu 32x32 is chosen */ |
| /* To be reset, if CU 64x64 is chosen */ |
| ps_intra16_analyse[j].b1_merge_flag = 1; |
| |
| /* storing the modes to intra 16 analyse */ |
| /* store the best 16x16 modes 8x8 tu */ |
| memcpy( |
| &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0], |
| &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], |
| sizeof(UWORD8) * (NUM_BEST_MODES)); |
| ps_intra16_analyse[j].au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255; |
| |
| /* store the best 16x16 modes 16x16 tu */ |
| memcpy( |
| &ps_intra16_analyse[j].au1_best_modes_16x16_tu[0], |
| &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], |
| sizeof(UWORD8) * (NUM_BEST_MODES)); |
| ps_intra16_analyse[j].au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255; |
| |
| /* divide the 16x16 costs (pro rating) to 4 8x8 costs */ |
| /* store the same 16x16 modes as 4 8x8 child modes */ |
| { |
| WORD32 idx_8x8; |
| WORD32 *pi4_intra_8_cost_curr16; |
| intra8_analyse_t *ps_intra8_analyse; |
| |
| pi4_intra_8_cost_curr16 = pi4_intra_8_cost + ((j & 1) << 1); |
| pi4_intra_8_cost_curr16 += ((j >> 1) << 1) * MAX_CU_IN_CTB_ROW; |
| |
| for(idx_8x8 = 0; idx_8x8 < 4; idx_8x8++) |
| { |
| pi4_intra_8_cost_curr16 |
| [(idx_8x8 & 1) + (MAX_CU_IN_CTB_ROW * (idx_8x8 >> 1))] = |
| (child_cost[j] + 3) >> 2; |
| |
| ps_intra8_analyse = |
| &ps_intra16_analyse[j].as_intra8_analyse[idx_8x8]; |
| |
| ps_intra8_analyse->b1_enable_nxn = 0; |
| ps_intra8_analyse->b1_valid_cu = 1; |
| |
| /* store the best 8x8 modes 8x8 tu */ |
| memcpy( |
| &ps_intra8_analyse->au1_best_modes_8x8_tu[0], |
| &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0], |
| sizeof(UWORD8) * (NUM_BEST_MODES + 1)); |
| |
| /* store the best 8x8 modes 4x4 tu */ |
| memcpy( |
| &ps_intra8_analyse->au1_best_modes_4x4_tu[0], |
| &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0], |
| sizeof(UWORD8) * (NUM_BEST_MODES + 1)); |
| |
| /* NXN modes not evaluated hence set to 0 */ |
| memset( |
| &ps_intra8_analyse->au1_4x4_best_modes[0][0], |
| 255, |
| sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1)); |
| } |
| } |
| } |
| |
| ihevce_set_nbr_map( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| ps_cu_node->ps_sub_cu[0]->u2_x0 << 1, |
| ps_cu_node->ps_sub_cu[0]->u2_y0 << 1, |
| (ps_cu_node->ps_sub_cu[0]->u1_cu_size >> 1), |
| 0); |
| } |
| #if 1 //DISBLE_CHILD_CU_EVAL_L0_IPE //1 |
| else |
| { |
| for(j = 0; j < 4; j++) |
| { |
| WORD32 idx_8x8; |
| intra8_analyse_t *ps_intra8_analyse; |
| ps_intra16_analyse[j].au1_best_modes_8x8_tu[0] = 255; |
| ps_intra16_analyse[j].au1_best_modes_16x16_tu[0] = 255; |
| |
| ps_intra16_analyse[j].b1_valid_cu = 0; |
| |
| for(idx_8x8 = 0; idx_8x8 < 4; idx_8x8++) |
| { |
| ps_intra8_analyse = |
| &ps_intra16_analyse[j].as_intra8_analyse[idx_8x8]; |
| |
| ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255; |
| ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255; |
| |
| ps_intra8_analyse->b1_enable_nxn = 0; |
| ps_intra8_analyse->b1_valid_cu = 0; |
| |
| /* NXN modes not evaluated hence set to 0 */ |
| memset( |
| &ps_intra8_analyse->au1_4x4_best_modes[0][0], |
| 255, |
| sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1)); |
| } |
| } |
| |
| child_cost_least = MAX_INTRA_COST_IPE; |
| } |
| #endif |
| |
| /* Populate params for 32x32 block analysis */ |
| |
| ps_cu_node->ps_parent->u1_cu_size = 32; |
| ps_cu_node->ps_parent->u2_x0 = |
| gau1_cu_pos_x[blk_cnt]; /* Populate properly */ |
| ps_cu_node->ps_parent->u2_y0 = |
| gau1_cu_pos_y[blk_cnt]; /* Populate properly */ |
| |
| /* Revaluation for 32x32 parent block at 16x16 prediction level */ |
| //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); |
| |
| { |
| /* Eval for TUSize = CuSize */ |
| ihevce_mode_eval_filtering( |
| ps_cu_node->ps_parent, |
| ps_cu_node, |
| ps_ctxt, |
| ps_curr_src, |
| 26, |
| &ps_cu_node->ps_parent->au4_best_cost_1tu[0], |
| &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
| step2_bypass, |
| 1); |
| |
| if(i4_enable_1cu_4tu) |
| { |
| /* Eval for TUSize = CuSize/2 */ |
| ihevce_mode_eval_filtering( |
| ps_cu_node->ps_parent, |
| ps_cu_node, |
| ps_ctxt, |
| ps_curr_src, |
| 26, |
| &ps_cu_node->ps_parent->au4_best_cost_4tu[0], |
| &ps_cu_node->ps_parent->au1_best_mode_4tu[0], |
| step2_bypass, |
| 0); |
| } |
| else |
| { |
| /* 4TU not evaluated : 4tu modes set same as 1tu modes */ |
| memcpy( |
| &ps_cu_node->ps_parent->au1_best_mode_4tu[0], |
| &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
| NUM_BEST_MODES); |
| |
| /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ |
| memcpy( |
| &ps_cu_node->ps_parent->au4_best_cost_4tu[0], |
| &ps_cu_node->ps_parent->au4_best_cost_1tu[0], |
| NUM_BEST_MODES * sizeof(WORD32)); |
| } |
| } |
| |
| ps_ctxt->u1_disable_child_cu_decide = 0; |
| step2_bypass = 1; |
| |
| /* Update parent cost */ |
| parent_cost = |
| MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0], |
| ps_cu_node->ps_parent->au4_best_cost_1tu[0]); |
| |
| /* Select the best mode to be populated as top and left nbr depending on the |
| 4tu and 1tu cost */ |
| if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] > |
| ps_cu_node->ps_parent->au4_best_cost_1tu[0]) |
| { |
| ps_cu_node->ps_parent->best_mode = |
| ps_cu_node->ps_parent->au1_best_mode_1tu[0]; |
| } |
| else |
| { |
| ps_cu_node->ps_parent->best_mode = |
| ps_cu_node->ps_parent->au1_best_mode_4tu[0]; |
| } |
| |
| /* store the 32x32 cost */ |
| *pi4_intra_32_cost = parent_cost; |
| |
| /* set the CU valid flag */ |
| ps_intra32_analyse->b1_valid_cu = 1; |
| |
| ps_intra32_analyse->b1_merge_flag = 1; |
| |
| /* storing the modes to intra 32 analyse */ |
| { |
| /* store the best 32x32 modes 16x16 tu */ |
| memcpy( |
| &ps_intra32_analyse->au1_best_modes_16x16_tu[0], |
| &ps_cu_node->ps_parent->au1_best_mode_4tu[0], |
| sizeof(UWORD8) * (NUM_BEST_MODES)); |
| ps_intra32_analyse->au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255; |
| |
| /* store the best 32x32 modes 32x32 tu */ |
| memcpy( |
| &ps_intra32_analyse->au1_best_modes_32x32_tu[0], |
| &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
| sizeof(UWORD8) * (NUM_BEST_MODES)); |
| ps_intra32_analyse->au1_best_modes_32x32_tu[NUM_BEST_MODES] = 255; |
| } |
| parent_best_mode = ps_cu_node->ps_parent->best_mode; |
| if((parent_cost <= |
| child_cost_least + (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >> |
| LAMBDA_Q_SHIFT))) //|| identical_modes) |
| { |
| WORD32 i4_q_scale_q3_mod; |
| UWORD8 u1_cu_possible_qp; |
| WORD32 i4_act_factor; |
| |
| /* CU size 32x32 and fill the final cu params */ |
| |
| ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); |
| |
| if((IHEVCE_QUALITY_P3 > i4_quality_preset)) |
| { |
| for(i = 0; i < 4; i++) |
| { |
| intra8_analyse_t *ps_intra8_analyse; |
| ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i]; |
| for(j = 0; j < 4; j++) |
| { |
| /* Populate best 3 nxn modes */ |
| ps_intra8_analyse->au1_4x4_best_modes[j][0] = |
| ps_cu_node->ps_sub_cu[i]->au1_best_mode_4tu[0]; |
| ps_intra8_analyse->au1_4x4_best_modes[j][1] = |
| ps_cu_node->ps_sub_cu[i] |
| ->au1_best_mode_4tu[1]; //(ps_ed + 1)->best_mode; |
| ps_intra8_analyse->au1_4x4_best_modes[j][2] = |
| ps_cu_node->ps_sub_cu[i] |
| ->au1_best_mode_4tu[2]; //(ps_ed + 2)->best_mode; |
| ps_intra8_analyse->au1_4x4_best_modes[j][3] = 255; |
| } |
| } |
| } |
| /* store the 32x32 non split flag */ |
| ps_intra32_analyse->b1_split_flag = 0; |
| ps_intra32_analyse->as_intra16_analyse[0].b1_split_flag = 0; |
| ps_intra32_analyse->as_intra16_analyse[1].b1_split_flag = 0; |
| ps_intra32_analyse->as_intra16_analyse[2].b1_split_flag = 0; |
| ps_intra32_analyse->as_intra16_analyse[3].b1_split_flag = 0; |
| |
| au1_best_32x32_modes[blk_cnt >> 4] = |
| ps_cu_node->ps_parent->au1_best_mode_1tu[0]; |
| |
| au4_best_32x32_cost[blk_cnt >> 4] = |
| ps_cu_node->ps_parent->au4_best_cost_1tu[0]; |
| /*As 32*32 has won, pick L2 8x8 qp which maps |
| to L0 32x32 Qp*/ |
| ASSERT(((blk_cnt >> 4) & 3) == (blk_cnt >> 4)); |
| ASSERT(ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0] != -2); |
| u1_cu_possible_qp = ihevce_cu_level_qp_mod( |
| ps_ctxt->i4_qscale, |
| ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0], |
| ps_ctxt->ld_curr_frame_16x16_log_avg[0], |
| f_strength, |
| &i4_act_factor, |
| &i4_q_scale_q3_mod, |
| ps_ctxt->ps_rc_quant_ctxt); |
| /* cost accumalation of best cu size candiate */ |
| i8_frame_acc_satd_cost += parent_cost; |
| |
| /* satd and mpm bits accumalation of best cu size candiate */ |
| i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd; |
| |
| /* Mode bits cost accumalation for best cu size and cu mode */ |
| i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost; |
| |
| /*satd/mod_qp accumulation of best cu */ |
| i8_frame_acc_satd_by_modqp_q10 += |
| ((LWORD64)ps_cu_node->ps_parent->best_satd |
| << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / |
| i4_q_scale_q3_mod; |
| |
| /* Increment pointers */ |
| ps_ed_blk_l1 += 16; |
| blk_cnt += 16; |
| //ps_row_cu++; |
| merge_64x64 &= 1; |
| } |
| else |
| { |
| /* store the 32x32 split flag */ |
| ps_intra32_analyse->b1_split_flag = 1; |
| |
| /* CU size 16x16 and fill the final cu params for all 4 blocks */ |
| for(j = 0; j < 4; j++) |
| { |
| WORD32 i4_q_scale_q3_mod; |
| UWORD8 u1_cu_possible_qp; |
| WORD32 i4_act_factor; |
| |
| /* Set CU split flag */ |
| ASSERT(blk_cnt % 4 == 0); |
| |
| ihevce_update_cand_list( |
| ps_cu_node->ps_sub_cu[j], ps_ed_blk_l1, ps_ctxt); |
| |
| /* store the 16x16 non split flag */ |
| ps_intra16_analyse[j].b1_split_flag = 0; |
| |
| ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); |
| ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0] != -2); |
| /*As 16*16 has won, pick L1 8x8 qp which maps |
| to L0 16x16 Qp*/ |
| u1_cu_possible_qp = ihevce_cu_level_qp_mod( |
| ps_ctxt->i4_qscale, |
| ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0], |
| ps_ctxt->ld_curr_frame_8x8_log_avg[0], |
| f_strength, |
| &i4_act_factor, |
| &i4_q_scale_q3_mod, |
| ps_ctxt->ps_rc_quant_ctxt); |
| |
| /*accum satd/qp for all child block*/ |
| i8_frame_acc_satd_by_modqp_q10 += |
| ((LWORD64)child_satd[j] |
| << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / |
| i4_q_scale_q3_mod; |
| |
| /* Accumalate mode bits for all child blocks */ |
| i8_frame_acc_mode_bits_cost += |
| ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost; |
| |
| /* satd and mpm bits accumalation of best cu size candiate */ |
| i4_ctb_acc_satd += child_satd[j]; |
| |
| /* Increment pointers */ |
| //ps_row_cu++; |
| ps_ed_blk_l1 += 4; |
| blk_cnt += 4; |
| } |
| |
| /* cost accumalation of best cu size candiate */ |
| i8_frame_acc_satd_cost += child_cost_least; |
| |
| /* 64x64 merge is not possible */ |
| merge_64x64 = 0; |
| } |
| |
| //ps_ed_blk_l2 += 4; |
| |
| } //end of EIID's else |
| #endif |
| } |
| /* If Merge success for L1 max CU size 16x16 is chosen */ |
| else if(merge_16x16_l1) |
| { |
| #if IP_DBG_L1_l2 |
| ps_cu_node->ps_parent->u1_cu_size = 16; |
| ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */ |
| ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */ |
| ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_merge_mode; |
| ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); |
| |
| blk_cnt += 4; |
| ps_ed_blk_l1 += 4; |
| ps_row_cu++; |
| merge_64x64 = 0; |
| #else |
| |
| /*EIID: evaluate only if L1 early-inter-intra decision is not favouring inter*/ |
| /* enable this only in B pictures */ |
| if(ps_ed_blk_l1->intra_or_inter == 2 && (ps_ctxt->i4_slice_type != ISLICE)) |
| { |
| WORD32 i4_q_scale_q3_mod, i4_local_ctr; |
| WORD8 i1_cu_possible_qp; |
| WORD32 i4_act_factor; |
| /* make cost infinity. */ |
| /* make modes invalid */ |
| /* update loop variables */ |
| /* set other output variales */ |
| /* dont set neighbour flag so that next blocks wont access this cu */ |
| /* what happens to ctb_mode_map?? */ |
| |
| ps_cu_node->ps_parent->u1_cu_size = 16; |
| ps_cu_node->ps_parent->u2_x0 = |
| gau1_cu_pos_x[blk_cnt]; /* Populate properly */ |
| ps_cu_node->ps_parent->u2_y0 = |
| gau1_cu_pos_y[blk_cnt]; /* Populate properly */ |
| ps_cu_node->ps_parent->best_mode = |
| INTRA_DC; //ps_ed_blk_l1->best_merge_mode; |
| |
| /* fill in the first modes as invalid */ |
| |
| ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC; |
| ps_cu_node->ps_parent->au1_best_mode_1tu[1] = |
| INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3 |
| ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC; |
| |
| ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC; |
| ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC; |
| ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC; |
| |
| ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); |
| |
| //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0; |
| //ps_row_cu->u1_num_intra_rdopt_cands = 0; |
| |
| ps_intra32_analyse->b1_split_flag = 1; |
| ps_intra32_analyse->b1_merge_flag = 0; |
| |
| ps_intra16_analyse->b1_valid_cu = 0; |
| ps_intra16_analyse->b1_split_flag = 0; |
| ps_intra16_analyse->b1_merge_flag = 1; |
| //memset (&ps_intra16_analyse->au1_best_modes_16x16_tu, |
| // 255, |
| // NUM_BEST_MODES); |
| //memset (&ps_intra16_analyse->au1_best_modes_8x8_tu, |
| // 255, |
| // NUM_BEST_MODES); |
| //set only first mode since if it's 255. it wont go ahead |
| ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 255; |
| ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 255; |
| ps_intra16_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE; |
| *pi4_intra_16_cost = MAX_INTRA_COST_IPE; |
| |
| /*since ME will start evaluating from bottom up, set the lower |
| cu size data invalid */ |
| for(i4_local_ctr = 0; i4_local_ctr < 4; i4_local_ctr++) |
| { |
| ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] |
| .au1_4x4_best_modes[0][0] = 255; |
| ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] |
| .au1_4x4_best_modes[1][0] = 255; |
| ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] |
| .au1_4x4_best_modes[2][0] = 255; |
| ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] |
| .au1_4x4_best_modes[3][0] = 255; |
| ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] |
| .au1_best_modes_8x8_tu[0] = 255; |
| ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] |
| .au1_best_modes_4x4_tu[0] = 255; |
| ps_intra16_analyse->as_intra8_analyse[i4_local_ctr].i4_best_intra_cost = |
| MAX_INTRA_COST_IPE; |
| |
| pi4_intra_8_cost |
| [(i4_local_ctr & 1) + (MAX_CU_IN_CTB_ROW * (i4_local_ctr >> 1))] = |
| MAX_INTRA_COST_IPE; |
| } |
| |
| /* set neighbours even if intra is not evaluated, since source is always available. */ |
| ihevce_set_nbr_map( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| ps_cu_node->ps_parent->u2_x0 << 1, |
| ps_cu_node->ps_parent->u2_y0 << 1, |
| (ps_cu_node->ps_parent->u1_cu_size >> 2), |
| 1); |
| |
| //what happends to RC variables?? |
| /* run only constant Qp */ |
| ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); |
| ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0] != -2); |
| i1_cu_possible_qp = ihevce_cu_level_qp_mod( |
| ps_ctxt->i4_qscale, |
| ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0], |
| ps_ctxt->ld_curr_frame_8x8_log_avg[0], |
| f_strength, |
| &i4_act_factor, |
| &i4_q_scale_q3_mod, |
| ps_ctxt->ps_rc_quant_ctxt); |
| |
| /* cost accumalation of best cu size candiate */ |
| i8_frame_acc_satd_cost += 0; //parent_cost; //incorrect accumulation |
| |
| /*satd/mod_qp accumulation of best cu */ |
| i8_frame_acc_satd_by_modqp_q10 += 0; //incorrect accumulation |
| //((LWORD64)ps_cu_node->ps_parent->best_satd << SATD_BY_ACT_Q_FAC)/i4_q_scale_q3_mod; |
| |
| /* Accumalate mode bits for all child blocks */ |
| i8_frame_acc_mode_bits_cost += |
| 0; //ps_cu_node->ps_parent->u2_mode_bits_cost; |
| //incoorect accumulation |
| |
| blk_cnt += 4; |
| ps_ed_blk_l1 += 4; |
| //ps_row_cu++; |
| merge_64x64 = 0; |
| |
| /* increment for stat purpose only. Increment is valid only on single thread */ |
| ps_ctxt->u4_num_16x16_skips_at_L0_IPE += 1; |
| } |
| else |
| { |
| /* 64x64 merge is not possible */ |
| merge_64x64 = 0; |
| |
| /* set the 32x32 split flag to 1 */ |
| ps_intra32_analyse->b1_split_flag = 1; |
| |
| ps_intra32_analyse->b1_merge_flag = 0; |
| |
| ps_intra16_analyse->b1_merge_flag = 1; |
| |
| if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) && |
| (ps_ctxt->i4_slice_type == PSLICE)) |
| { |
| ps_ctxt->u1_disable_child_cu_decide = 1; |
| step2_bypass = 0; |
| } |
| //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); |
| /* Based on the flag, Child modes decision can be disabled*/ |
| if(0 == ps_ctxt->u1_disable_child_cu_decide) |
| { |
| for(j = 0; j < 4; j++) |
| { |
| intra8_analyse_t *ps_intra8_analyse; |
| WORD32 best_ang_mode = (ps_ed_blk_l1 + j)->best_mode; |
| |
| if(best_ang_mode < 2) |
| best_ang_mode = 26; |
| |
| //ps_cu_node->ps_sub_cu[j]->best_cost = MAX_INTRA_COST_IPE; |
| //ps_cu_node->ps_sub_cu[j]->best_mode = (ps_ed_blk_l1 + j)->best_mode; |
| |
| ps_cu_node->ps_sub_cu[j]->u2_x0 = |
| gau1_cu_pos_x[blk_cnt + j]; /* Populate properly */ |
| ps_cu_node->ps_sub_cu[j]->u2_y0 = |
| gau1_cu_pos_y[blk_cnt + j]; /* Populate properly */ |
| ps_cu_node->ps_sub_cu[j]->u1_cu_size = 8; |
| |
| ihevce_mode_eval_filtering( |
| ps_cu_node->ps_sub_cu[j], |
| ps_cu_node, |
| ps_ctxt, |
| ps_curr_src, |
| best_ang_mode, |
| &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0], |
| &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], |
| !step2_bypass, |
| 1); |
| |
| if(i4_enable_4cu_16tu) |
| { |
| ihevce_mode_eval_filtering( |
| ps_cu_node->ps_sub_cu[j], |
| ps_cu_node, |
| ps_ctxt, |
| ps_curr_src, |
| best_ang_mode, |
| &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], |
| &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], |
| !step2_bypass, |
| 0); |
| } |
| else |
| { |
| /* 4TU not evaluated : 4tu modes set same as 1tu modes */ |
| memcpy( |
| &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], |
| &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], |
| NUM_BEST_MODES); |
| |
| /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ |
| memcpy( |
| &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], |
| &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0], |
| NUM_BEST_MODES * sizeof(WORD32)); |
| } |
| |
| child_cost[j] = |
| MIN(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], |
| ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]); |
| |
| child_cost_least += child_cost[j]; |
| |
| /* Select the best mode to be populated as top and left nbr depending on the |
| 4tu and 1tu cost */ |
| if(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0] > |
| ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]) |
| { |
| ps_cu_node->ps_sub_cu[j]->best_mode = |
| ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0]; |
| } |
| else |
| { |
| ps_cu_node->ps_sub_cu[j]->best_mode = |
| ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0]; |
| } |
| { /* Update the CTB nodes only for MAX - 1 CU nodes */ |
| WORD32 xA, yA, row, col; |
| xA = ((ps_cu_node->ps_sub_cu[j]->u2_x0 << 3) >> 2) + 1; |
| yA = ((ps_cu_node->ps_sub_cu[j]->u2_y0 << 3) >> 2) + 1; |
| size = ps_cu_node->ps_sub_cu[j]->u1_cu_size >> 2; |
| for(row = yA; row < (yA + size); row++) |
| { |
| for(col = xA; col < (xA + size); col++) |
| { |
| ps_ctxt->au1_ctb_mode_map[row][col] = |
| ps_cu_node->ps_sub_cu[j]->best_mode; |
| } |
| } |
| } |
| |
| /*collect individual child satd for final SATD/qp accum*/ |
| child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd; |
| |
| ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j]; |
| |
| /* store the child 8x8 costs */ |
| pi4_intra_8_cost[(j & 1) + (MAX_CU_IN_CTB_ROW * (j >> 1))] = |
| child_cost[j]; |
| |
| /* set the CU valid flag */ |
| ps_intra8_analyse->b1_valid_cu = 1; |
| ps_intra8_analyse->b1_enable_nxn = 0; |
| |
| /* storing the modes to intra8 analyse */ |
| |
| /* store the best 8x8 modes 8x8 tu */ |
| memcpy( |
| &ps_intra8_analyse->au1_best_modes_8x8_tu[0], |
| &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], |
| sizeof(UWORD8) * (NUM_BEST_MODES)); |
| ps_intra8_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255; |
| |
| /* store the best 8x8 modes 4x4 tu */ |
| memcpy( |
| &ps_intra8_analyse->au1_best_modes_4x4_tu[0], |
| &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], |
| sizeof(UWORD8) * (NUM_BEST_MODES)); |
| ps_intra8_analyse->au1_best_modes_4x4_tu[NUM_BEST_MODES] = 255; |
| |
| /* NXN modes not evaluated hence set to 255 */ |
| memset( |
| &ps_intra8_analyse->au1_4x4_best_modes[0][0], |
| 255, |
| sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1)); |
| } |
| |
| ihevce_set_nbr_map( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| ps_cu_node->ps_sub_cu[0]->u2_x0 << 1, |
| ps_cu_node->ps_sub_cu[0]->u2_y0 << 1, |
| (ps_cu_node->ps_sub_cu[0]->u1_cu_size >> 1), |
| 0); |
| } |
| #if 1 //DISBLE_CHILD_CU_EVAL_L0_IPE //1 |
| else |
| { |
| for(j = 0; j < 4; j++) |
| { |
| intra8_analyse_t *ps_intra8_analyse; |
| ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j]; |
| ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255; |
| ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255; |
| /* NXN modes not evaluated hence set to 255 */ |
| memset( |
| &ps_intra8_analyse->au1_4x4_best_modes[0][0], |
| 255, |
| sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1)); |
| |
| ps_intra8_analyse->b1_valid_cu = 0; |
| ps_intra8_analyse->b1_enable_nxn = 0; |
| } |
| child_cost_least = MAX_INTRA_COST_IPE; |
| } |
| #endif |
| //ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode; |
| //ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE; |
| |
| ps_cu_node->ps_parent->u1_cu_size = 16; |
| ps_cu_node->ps_parent->u2_x0 = |
| gau1_cu_pos_x[blk_cnt]; /* Populate properly */ |
| ps_cu_node->ps_parent->u2_y0 = |
| gau1_cu_pos_y[blk_cnt]; /* Populate properly */ |
| |
| //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); |
| |
| /* Eval for TUSize = CuSize */ |
| ihevce_mode_eval_filtering( |
| ps_cu_node->ps_parent, |
| ps_cu_node, |
| ps_ctxt, |
| ps_curr_src, |
| 26, |
| &ps_cu_node->ps_parent->au4_best_cost_1tu[0], |
| &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
| step2_bypass, |
| 1); |
| |
| if(i4_enable_1cu_4tu) |
| { |
| /* Eval for TUSize = CuSize/2 */ |
| ihevce_mode_eval_filtering( |
| ps_cu_node->ps_parent, |
| ps_cu_node, |
| ps_ctxt, |
| ps_curr_src, |
| 26, |
| &ps_cu_node->ps_parent->au4_best_cost_4tu[0], |
| &ps_cu_node->ps_parent->au1_best_mode_4tu[0], |
| step2_bypass, |
| 0); |
| } |
| else |
| { |
| /* 4TU not evaluated : 4tu modes set same as 1tu modes */ |
| memcpy( |
| &ps_cu_node->ps_parent->au1_best_mode_4tu[0], |
| &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
| NUM_BEST_MODES); |
| |
| /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ |
| memcpy( |
| &ps_cu_node->ps_parent->au4_best_cost_4tu[0], |
| &ps_cu_node->ps_parent->au4_best_cost_1tu[0], |
| NUM_BEST_MODES * sizeof(WORD32)); |
| } |
| |
| ps_ctxt->u1_disable_child_cu_decide = 0; |
| step2_bypass = 1; |
| |
| /* Update parent cost */ |
| parent_cost = |
| MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0], |
| ps_cu_node->ps_parent->au4_best_cost_1tu[0]); |
| |
| /* Select the best mode to be populated as top and left nbr depending on the |
| 4tu and 1tu cost */ |
| if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] > |
| ps_cu_node->ps_parent->au4_best_cost_1tu[0]) |
| { |
| ps_cu_node->ps_parent->best_mode = |
| ps_cu_node->ps_parent->au1_best_mode_1tu[0]; |
| } |
| else |
| { |
| ps_cu_node->ps_parent->best_mode = |
| ps_cu_node->ps_parent->au1_best_mode_4tu[0]; |
| } |
| |
| /* store the 16x16 cost */ |
| *pi4_intra_16_cost = parent_cost; |
| |
| /* accumulate the 32x32 cost */ |
| if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost) |
| { |
| *pi4_intra_32_cost = parent_cost; |
| } |
| else |
| { |
| *pi4_intra_32_cost += parent_cost; |
| } |
| |
| /* set the CU valid flag */ |
| ps_intra16_analyse->b1_valid_cu = 1; |
| |
| /* storing the modes to intra 16 analyse */ |
| { |
| /* store the best 16x16 modes 16x16 tu */ |
| memcpy( |
| &ps_intra16_analyse->au1_best_modes_16x16_tu[0], |
| &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
| sizeof(UWORD8) * NUM_BEST_MODES); |
| ps_intra16_analyse->au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255; |
| |
| /* store the best 16x16 modes 8x8 tu */ |
| memcpy( |
| &ps_intra16_analyse->au1_best_modes_8x8_tu[0], |
| &ps_cu_node->ps_parent->au1_best_mode_4tu[0], |
| sizeof(UWORD8) * NUM_BEST_MODES); |
| ps_intra16_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255; |
| } |
| |
| parent_best_mode = ps_cu_node->ps_parent->best_mode; |
| if(parent_cost <= |
| child_cost_least + (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >> |
| LAMBDA_Q_SHIFT)) //|| identical_modes) |
| { |
| WORD32 i4_q_scale_q3_mod; |
| WORD8 i1_cu_possible_qp; |
| WORD32 i4_act_factor; |
| //choose parent CU |
| |
| ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); |
| |
| /* set the 16x16 non split flag */ |
| ps_intra16_analyse->b1_split_flag = 0; |
| |
| /*As 16*16 has won, pick L1 8x8 qp which maps |
| to L0 16x16 Qp*/ |
| ASSERT(((blk_cnt >> 4) & 3) == (blk_cnt >> 4)); |
| ASSERT(ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0] != -2); |
| i1_cu_possible_qp = ihevce_cu_level_qp_mod( |
| ps_ctxt->i4_qscale, |
| ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0], |
| ps_ctxt->ld_curr_frame_8x8_log_avg[0], |
| f_strength, |
| &i4_act_factor, |
| &i4_q_scale_q3_mod, |
| ps_ctxt->ps_rc_quant_ctxt); |
| |
| /* cost accumalation of best cu size candiate */ |
| i8_frame_acc_satd_cost += parent_cost; |
| |
| /* satd and mpm bits accumalation of best cu size candiate */ |
| i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd; |
| |
| /*satd/mod_qp accumulation of best cu */ |
| i8_frame_acc_satd_by_modqp_q10 += |
| ((LWORD64)ps_cu_node->ps_parent->best_satd |
| << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / |
| i4_q_scale_q3_mod; |
| |
| /* Accumalate mode bits for all child blocks */ |
| i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost; |
| |
| blk_cnt += 4; |
| ps_ed_blk_l1 += 4; |
| //ps_row_cu++; |
| } |
| else |
| { |
| //choose child CU |
| WORD8 i1_cu_possible_qp; |
| WORD32 i4_act_factor; |
| WORD32 i4_q_scale_q3_mod; |
| |
| ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); |
| ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][1] != -2); |
| i1_cu_possible_qp = ihevce_cu_level_qp_mod( |
| ps_ctxt->i4_qscale, |
| ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][1], |
| ps_ctxt->ld_curr_frame_8x8_log_avg[1], |
| f_strength, |
| &i4_act_factor, |
| &i4_q_scale_q3_mod, |
| ps_ctxt->ps_rc_quant_ctxt); |
| |
| /* set the 16x16 split flag */ |
| ps_intra16_analyse->b1_split_flag = 1; |
| |
| for(j = 0; j < 4; j++) |
| { |
| ihevce_update_cand_list( |
| ps_cu_node->ps_sub_cu[j], ps_ed_blk_l1, ps_ctxt); |
| |
| if((IHEVCE_QUALITY_P3 > i4_quality_preset)) |
| { |
| WORD32 k; |
| intra8_analyse_t *ps_intra8_analyse; |
| ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j]; |
| |
| for(k = 0; k < 4; k++) |
| { |
| /* Populate best 3 nxn modes */ |
| ps_intra8_analyse->au1_4x4_best_modes[k][0] = |
| ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0]; |
| ps_intra8_analyse->au1_4x4_best_modes[k][1] = |
| ps_cu_node->ps_sub_cu[j] |
| ->au1_best_mode_4tu[1]; //(ps_ed + 1)->best_mode; |
| ps_intra8_analyse->au1_4x4_best_modes[k][2] = |
| ps_cu_node->ps_sub_cu[j] |
| ->au1_best_mode_4tu[2]; //(ps_ed + 2)->best_mode; |
| ps_intra8_analyse->au1_4x4_best_modes[k][3] = 255; |
| } |
| } |
| /*accum satd/qp for all child block*/ |
| i8_frame_acc_satd_by_modqp_q10 += |
| ((LWORD64)child_satd[j] |
| << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / |
| i4_q_scale_q3_mod; |
| |
| /* Accumalate mode bits for all child blocks */ |
| i8_frame_acc_mode_bits_cost += |
| ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost; |
| |
| /* satd and mpm bits accumalation of best cu size candiate */ |
| i4_ctb_acc_satd += child_satd[j]; |
| |
| blk_cnt += 1; |
| ps_ed_blk_l1 += 1; |
| //ps_row_cu++; |
| } |
| |
| /* cost accumalation of best cu size candiate */ |
| i8_frame_acc_satd_cost += child_cost_least; |
| } |
| |
| } //else of EIID |
| #endif |
| } // if(merge_16x16_l1) |
| /* MAX CU SIZE 8x8 */ |
| else |
| { |
| #if IP_DBG_L1_l2 |
| for(i = 0; i < 4; i++) |
| { |
| ps_cu_node->ps_parent->u1_cu_size = 8; |
| ps_cu_node->ps_parent->u2_x0 = |
| gau1_cu_pos_x[blk_cnt]; /* Populate properly */ |
| ps_cu_node->ps_parent->u2_y0 = |
| gau1_cu_pos_y[blk_cnt]; /* Populate properly */ |
| ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode; |
| |
| ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); |
| blk_cnt++; |
| ps_ed_blk_l1++; |
| ps_row_cu++; |
| merge_64x64 = 0; |
| } |
| #else |
| |
| /* EIID: Skip all 4 8x8 block if L1 decisions says skip intra */ |
| if(ps_ed_blk_l1->intra_or_inter == 2 && (ps_ctxt->i4_slice_type != ISLICE)) |
| { |
| WORD32 i4_q_scale_q3_mod; |
| WORD8 i1_cu_possible_qp; |
| WORD32 i4_act_factor; |
| |
| merge_64x64 = 0; |
| |
| ps_intra32_analyse->b1_merge_flag = 0; |
| |
| ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 255; |
| ps_intra16_analyse->au1_best_modes_8x8_tu[1] = 255; |
| ps_intra16_analyse->au1_best_modes_8x8_tu[2] = 255; |
| |
| ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 255; |
| ps_intra16_analyse->au1_best_modes_16x16_tu[1] = 255; |
| ps_intra16_analyse->au1_best_modes_16x16_tu[2] = 255; |
| ps_intra16_analyse->b1_split_flag = 1; |
| ps_intra16_analyse->b1_valid_cu = 0; |
| ps_intra16_analyse->b1_merge_flag = 0; |
| |
| ps_intra16_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE; |
| |
| for(i = 0; i < 4; i++) |
| { |
| intra8_analyse_t *ps_intra8_analyse; |
| WORD32 ctr_sub_cu; |
| |
| cu_pos_x = gau1_cu_pos_x[blk_cnt]; |
| cu_pos_y = gau1_cu_pos_y[blk_cnt]; |
| |
| if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y)) |
| { |
| ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i]; |
| |
| ps_intra8_analyse->b1_valid_cu = 0; |
| ps_intra8_analyse->b1_enable_nxn = 0; |
| ps_intra8_analyse->au1_4x4_best_modes[0][0] = 255; |
| ps_intra8_analyse->au1_4x4_best_modes[1][0] = 255; |
| ps_intra8_analyse->au1_4x4_best_modes[2][0] = 255; |
| ps_intra8_analyse->au1_4x4_best_modes[3][0] = 255; |
| ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255; |
| ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255; |
| ps_intra8_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE; |
| |
| ps_cu_node->ps_parent->u1_cu_size = 8; |
| ps_cu_node->ps_parent->u2_x0 = |
| gau1_cu_pos_x[blk_cnt]; /* Populate properly */ |
| ps_cu_node->ps_parent->u2_y0 = |
| gau1_cu_pos_y[blk_cnt]; /* Populate properly */ |
| ps_cu_node->ps_parent->best_mode = |
| INTRA_DC; //ps_ed_blk_l1->best_mode; |
| |
| /* fill in the first modes as invalid */ |
| |
| ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC; |
| ps_cu_node->ps_parent->au1_best_mode_1tu[1] = |
| INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3 |
| ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC; |
| |
| ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC; |
| ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC; |
| ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC; |
| |
| ihevce_update_cand_list( |
| ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); |
| |
| //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0; |
| //ps_row_cu->u1_num_intra_rdopt_cands = 0; |
| |
| for(ctr_sub_cu = 0; ctr_sub_cu < 4; ctr_sub_cu++) |
| { |
| ps_cu_node->ps_sub_cu[ctr_sub_cu]->au1_best_mode_1tu[0] = |
| INTRA_DC; |
| ps_cu_node->ps_sub_cu[ctr_sub_cu]->au1_best_mode_4tu[0] = |
| INTRA_DC; |
| ps_cu_node->ps_sub_cu[ctr_sub_cu]->au4_best_cost_1tu[0] = |
| MAX_INTRA_COST_IPE; |
| |
| ps_cu_node->ps_sub_cu[ctr_sub_cu]->au4_best_cost_4tu[0] = |
| MAX_INTRA_COST_IPE; |
| ps_cu_node->ps_sub_cu[ctr_sub_cu]->best_cost = |
| MAX_INTRA_COST_IPE; |
| } |
| |
| pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] = |
| MAX_INTRA_COST_IPE; |
| |
| ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); |
| ASSERT(ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1] != -2); |
| i1_cu_possible_qp = ihevce_cu_level_qp_mod( |
| ps_ctxt->i4_qscale, |
| ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1], |
| ps_ctxt->ld_curr_frame_8x8_log_avg[1], |
| f_strength, |
| &i4_act_factor, |
| &i4_q_scale_q3_mod, |
| ps_ctxt->ps_rc_quant_ctxt); |
| |
| /* set neighbours even if intra is not evaluated, since source is always available. */ |
| ihevce_set_nbr_map( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| ps_cu_node->ps_parent->u2_x0 << 1, |
| ps_cu_node->ps_parent->u2_y0 << 1, |
| (ps_cu_node->ps_parent->u1_cu_size >> 2), |
| 1); |
| |
| //ps_row_cu++; |
| } |
| blk_cnt++; |
| ps_ed_blk_l1++; |
| } |
| } |
| else |
| { |
| //cu_intra_cand_t *ps_cu_intra_cand; |
| WORD8 i1_cu_possible_qp; |
| WORD32 i4_act_factor; |
| WORD32 i4_q_scale_q3_mod; |
| |
| ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); |
| ASSERT(ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1] != -2); |
| i1_cu_possible_qp = ihevce_cu_level_qp_mod( |
| ps_ctxt->i4_qscale, |
| ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1], |
| ps_ctxt->ld_curr_frame_8x8_log_avg[1], |
| f_strength, |
| &i4_act_factor, |
| &i4_q_scale_q3_mod, |
| ps_ctxt->ps_rc_quant_ctxt); |
| |
| /* 64x64 merge is not possible */ |
| merge_64x64 = 0; |
| |
| ps_intra32_analyse->b1_merge_flag = 0; |
| |
| ps_intra16_analyse->b1_merge_flag = 0; |
| |
| /* by default 16x16 modes are set to default values DC and Planar */ |
| ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 0; |
| ps_intra16_analyse->au1_best_modes_8x8_tu[1] = 1; |
| ps_intra16_analyse->au1_best_modes_8x8_tu[2] = 255; |
| |
| ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 0; |
| ps_intra16_analyse->au1_best_modes_16x16_tu[1] = 1; |
| ps_intra16_analyse->au1_best_modes_16x16_tu[2] = 255; |
| ps_intra16_analyse->b1_split_flag = 1; |
| ps_intra16_analyse->b1_valid_cu = 1; |
| |
| for(i = 0; i < 4; i++) |
| { |
| intra8_analyse_t *ps_intra8_analyse; |
| cu_pos_x = gau1_cu_pos_x[blk_cnt]; |
| cu_pos_y = gau1_cu_pos_y[blk_cnt]; |
| if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y)) |
| { |
| //ps_cu_intra_cand = &ps_row_cu->s_cu_intra_cand; |
| //ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE; |
| |
| //ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode; |
| |
| child_cost_least = 0; |
| |
| ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i]; |
| ps_cu_node->ps_parent->u1_cu_size = 8; |
| ps_cu_node->ps_parent->u2_x0 = |
| gau1_cu_pos_x[blk_cnt]; /* Populate properly */ |
| ps_cu_node->ps_parent->u2_y0 = |
| gau1_cu_pos_y[blk_cnt]; /* Populate properly */ |
| |
| //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); |
| |
| /*EARLY DECISION 8x8 block */ |
| ihevce_pu_calc_8x8_blk( |
| ps_curr_src, ps_ctxt, ps_cu_node, ps_ctxt->ps_func_selector); |
| for(j = 0; j < 4; j++) |
| { |
| child_cost_least += ps_cu_node->ps_sub_cu[j]->best_cost; |
| child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd; |
| } |
| |
| /* Based on the flag, CU = 4TU modes decision can be disabled, CU = 4PU is retained */ |
| if(0 == ps_ctxt->u1_disable_child_cu_decide) |
| { |
| ihevce_set_nbr_map( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| ps_cu_node->ps_parent->u2_x0 << 1, |
| ps_cu_node->ps_parent->u2_y0 << 1, |
| (ps_cu_node->ps_parent->u1_cu_size >> 2), |
| 0); |
| |
| //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); |
| |
| /* Eval for TUSize = CuSize */ |
| ihevce_mode_eval_filtering( |
| ps_cu_node->ps_parent, |
| ps_cu_node, |
| ps_ctxt, |
| ps_curr_src, |
| 26, |
| &ps_cu_node->ps_parent->au4_best_cost_1tu[0], |
| &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
| step2_bypass, |
| 1); |
| |
| if(i4_enable_1cu_4tu) |
| { |
| /* Eval for TUSize = CuSize/2 */ |
| ihevce_mode_eval_filtering( |
| ps_cu_node->ps_parent, |
| ps_cu_node, |
| ps_ctxt, |
| ps_curr_src, |
| 26, |
| &ps_cu_node->ps_parent->au4_best_cost_4tu[0], |
| &ps_cu_node->ps_parent->au1_best_mode_4tu[0], |
| step2_bypass, |
| 0); |
| } |
| else |
| { |
| /* 4TU not evaluated : 4tu modes set same as 1tu modes */ |
| memcpy( |
| &ps_cu_node->ps_parent->au1_best_mode_4tu[0], |
| &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
| NUM_BEST_MODES); |
| |
| /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ |
| memcpy( |
| &ps_cu_node->ps_parent->au4_best_cost_4tu[0], |
| &ps_cu_node->ps_parent->au4_best_cost_1tu[0], |
| NUM_BEST_MODES * sizeof(WORD32)); |
| } |
| |
| /* Update parent cost */ |
| parent_cost = |
| MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0], |
| ps_cu_node->ps_parent->au4_best_cost_1tu[0]); |
| |
| /* Select the best mode to be populated as top and left nbr depending on the |
| 4tu and 1tu cost */ |
| if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] > |
| ps_cu_node->ps_parent->au4_best_cost_1tu[0]) |
| { |
| ps_cu_node->ps_parent->best_mode = |
| ps_cu_node->ps_parent->au1_best_mode_1tu[0]; |
| } |
| else |
| { |
| ps_cu_node->ps_parent->best_mode = |
| ps_cu_node->ps_parent->au1_best_mode_4tu[0]; |
| } |
| } |
| |
| /* set the CU valid flag */ |
| ps_intra8_analyse->b1_valid_cu = 1; |
| ps_intra8_analyse->b1_enable_nxn = 0; |
| |
| /* storing the modes to intra 8 analyse */ |
| |
| /* store the best 8x8 modes 8x8 tu */ |
| memcpy( |
| &ps_intra8_analyse->au1_best_modes_8x8_tu[0], |
| &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
| sizeof(UWORD8) * (NUM_BEST_MODES)); |
| ps_intra8_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255; |
| |
| /* store the best 8x8 modes 4x4 tu */ |
| memcpy( |
| &ps_intra8_analyse->au1_best_modes_4x4_tu[0], |
| &ps_cu_node->ps_parent->au1_best_mode_4tu[0], |
| sizeof(UWORD8) * (NUM_BEST_MODES)); |
| ps_intra8_analyse->au1_best_modes_4x4_tu[NUM_BEST_MODES] = 255; |
| |
| /*As 8*8 has won, pick L1 4x4 qp which is equal to |
| L1 8x8 Qp*/ |
| //ps_row_cu->u1_cu_possible_qp[0] = u1_cu_possible_qp; |
| //ps_row_cu->i4_act_factor[0][1] = i4_act_factor; |
| |
| parent_best_mode = ps_cu_node->ps_parent->best_mode; |
| if(parent_cost <= |
| child_cost_least + |
| (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >> LAMBDA_Q_SHIFT)) |
| { |
| /*CU = 4TU */ |
| ihevce_update_cand_list( |
| ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); |
| |
| /* store the child 8x8 costs */ |
| pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] = |
| parent_cost; |
| |
| /* cost accumalation of best cu size candiate */ |
| i8_frame_acc_satd_cost += parent_cost; |
| |
| /*satd/mod_qp accumulation of best cu */ |
| i8_frame_acc_satd_by_modqp_q10 += |
| ((LWORD64)ps_cu_node->ps_parent->best_satd |
| << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / |
| i4_q_scale_q3_mod; |
| |
| /* Accumalate mode bits for all child blocks */ |
| i8_frame_acc_mode_bits_cost += |
| ps_cu_node->ps_parent->u2_mode_bits_cost; |
| |
| /* satd and mpm bits accumalation of best cu size candiate */ |
| i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd; |
| |
| /* accumulate the 16x16 cost*/ |
| if(MAX_INTRA_COST_IPE == *pi4_intra_16_cost) |
| { |
| *pi4_intra_16_cost = parent_cost; |
| } |
| else |
| { |
| *pi4_intra_16_cost += parent_cost; |
| } |
| |
| /* accumulate the 32x32 cost*/ |
| if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost) |
| { |
| *pi4_intra_32_cost = parent_cost; |
| } |
| else |
| { |
| *pi4_intra_32_cost += parent_cost; |
| } |
| } |
| else |
| { |
| /*CU = 4PU*/ |
| //ps_row_cu->b3_cu_pos_x = (UWORD8) ps_cu_node->ps_parent->u2_x0; |
| //ps_row_cu->b3_cu_pos_y = (UWORD8) ps_cu_node->ps_parent->u2_y0; |
| //ps_row_cu->u1_cu_size = ps_cu_node->ps_parent->u1_cu_size; |
| |
| /* store the child 8x8 costs woth 4x4 pu summed cost */ |
| pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] = |
| (child_cost_least); |
| |
| /* accumulate the 16x16 cost*/ |
| if(MAX_INTRA_COST_IPE == *pi4_intra_16_cost) |
| { |
| *pi4_intra_16_cost = child_cost_least; |
| } |
| else |
| { |
| *pi4_intra_16_cost += child_cost_least; |
| } |
| |
| /* cost accumalation of best cu size candiate */ |
| i8_frame_acc_satd_cost += child_cost_least; |
| |
| for(j = 0; j < 4; j++) |
| { |
| /*satd/qp accumualtion*/ |
| i8_frame_acc_satd_by_modqp_q10 += |
| ((LWORD64)child_satd[j] |
| << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / |
| i4_q_scale_q3_mod; |
| |
| /* Accumalate mode bits for all child blocks */ |
| i8_frame_acc_mode_bits_cost += |
| ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost; |
| |
| /* satd and mpm bits accumalation of best cu size candiate */ |
| i4_ctb_acc_satd += child_satd[j]; |
| } |
| |
| /* accumulate the 32x32 cost*/ |
| if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost) |
| { |
| *pi4_intra_32_cost = child_cost_least; |
| } |
| else |
| { |
| *pi4_intra_32_cost += child_cost_least; |
| } |
| |
| ps_intra8_analyse->b1_enable_nxn = 1; |
| |
| /* Insert the best 8x8 modes unconditionally */ |
| |
| x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1; |
| y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1; |
| size = ps_cu_node->u1_cu_size >> 2; |
| |
| ps_ctxt->au1_ctb_mode_map[y][x] = |
| ps_cu_node->ps_sub_cu[0]->best_mode; |
| ps_ctxt->au1_ctb_mode_map[y][x + 1] = |
| ps_cu_node->ps_sub_cu[1]->best_mode; |
| ps_ctxt->au1_ctb_mode_map[y + 1][x] = |
| ps_cu_node->ps_sub_cu[2]->best_mode; |
| ps_ctxt->au1_ctb_mode_map[y + 1][x + 1] = |
| ps_cu_node->ps_sub_cu[3]->best_mode; |
| } |
| /* NXN mode population */ |
| for(j = 0; j < 4; j++) |
| { |
| cand_mode_list[0] = |
| ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0]; |
| cand_mode_list[1] = |
| ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[1]; |
| cand_mode_list[2] = |
| ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[2]; |
| |
| if(1) |
| { |
| /* Populate best 3 nxn modes */ |
| ps_intra8_analyse->au1_4x4_best_modes[j][0] = |
| cand_mode_list[0]; |
| ps_intra8_analyse->au1_4x4_best_modes[j][1] = |
| cand_mode_list[1]; //(ps_ed + 1)->best_mode; |
| ps_intra8_analyse->au1_4x4_best_modes[j][2] = |
| cand_mode_list[2]; //(ps_ed + 2)->best_mode; |
| ps_intra8_analyse->au1_4x4_best_modes[j][3] = 255; |
| |
| //memcpy(ps_intra8_analyse->au1_4x4_best_modes[j], ps_row_cu->s_cu_intra_cand.au1_intra_luma_modes_nxn[j], 4); |
| } |
| /* For HQ, all 35 modes to be used for RDOPT, removed from here for memory clean-up */ |
| |
| else /* IHEVCE_QUALITY_P0 == i4_quality_preset */ |
| { |
| /* To indicate to enc loop that NXN is enabled in HIGH QUALITY fior CU 8x8*/ |
| ps_intra8_analyse->au1_4x4_best_modes[j][0] = 0; |
| } |
| |
| ps_intra8_analyse |
| ->au1_4x4_best_modes[j][MAX_INTRA_CU_CANDIDATES] = 255; |
| } |
| |
| //ps_row_cu++; |
| } |
| else |
| { |
| /* For Incomplete CTB, 16x16 is not valid */ |
| ps_intra16_analyse->b1_valid_cu = 0; |
| } |
| blk_cnt++; |
| ps_ed_blk_l1++; |
| } |
| //ps_ed_blk_l2 ++; |
| } //else of EIID |
| #endif |
| } |
| } |
| else |
| { |
| /* For incomplete CTB, init valid CU to 0 */ |
| ps_ed_blk_l1++; |
| ps_intra32_analyse->b1_valid_cu = 0; |
| ps_intra16_analyse[0].b1_valid_cu = 0; |
| blk_cnt++; |
| merge_64x64 = 0; |
| } |
| } while(blk_cnt != MAX_CTB_SIZE); |
| /* if 64x64 merge is possible then check for 32x32 having same best modes */ |
| if(1 == merge_64x64) |
| { |
| WORD32 act_mode = au1_best_32x32_modes[0]; |
| |
| ps_ed_blk_l2 = ps_ed_l2_ctb; |
| best_mode = ps_ed_blk_l2->best_mode; |
| merge_64x64 = |
| ((act_mode == au1_best_32x32_modes[0]) + (act_mode == au1_best_32x32_modes[1]) + |
| (act_mode == au1_best_32x32_modes[2]) + |
| (act_mode == au1_best_32x32_modes[3]) == |
| 4); |
| if(merge_64x64 == 1) |
| best_mode = au1_best_32x32_modes[0]; |
| else |
| best_mode = ps_ed_blk_l2->best_mode; |
| /* All 32x32 costs are accumalated to 64x64 cost */ |
| ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = 0; |
| for(i = 0; i < 4; i++) |
| { |
| ps_l0_ipe_out_ctb->i4_best64x64_intra_cost += |
| ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i]; |
| } |
| |
| /* If all modes of 32x32 block is not same */ |
| if(0 == merge_64x64) |
| { |
| /*Compute CHILD cost for 32x32 */ |
| WORD32 child_cost_64x64 = au4_best_32x32_cost[0] + au4_best_32x32_cost[1] + |
| au4_best_32x32_cost[2] + au4_best_32x32_cost[3]; |
| WORD32 cost = MAX_INTRA_COST_IPE; |
| |
| WORD32 best_mode_temp = 0; |
| /*Compute 64x64 cost for each mode of 32x32*/ |
| for(i = 0; i < 4; i++) |
| { |
| WORD32 mode = au1_best_32x32_modes[i]; |
| if(mode < 2) |
| mode = 26; |
| ps_cu_node->ps_parent->u1_cu_size = 64; |
| ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[0]; /* Populate properly */ |
| ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[0]; /* Populate properly */ |
| |
| ihevce_set_nbr_map( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| (ps_cu_node->ps_parent->u2_x0 << 1), |
| (ps_cu_node->ps_parent->u2_y0 << 1), |
| (ps_cu_node->ps_parent->u1_cu_size >> 2), |
| 0); |
| |
| ihevce_mode_eval_filtering( |
| ps_cu_node->ps_parent, |
| ps_cu_node, |
| ps_ctxt, |
| ps_curr_src, |
| mode, |
| &ps_cu_node->ps_parent->au4_best_cost_1tu[0], |
| &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
| !step2_bypass, |
| 0); |
| |
| parent_cost = ps_cu_node->ps_parent->best_cost; |
| if(cost > parent_cost) |
| { |
| cost = parent_cost; |
| best_mode_temp = ps_cu_node->ps_parent->best_mode; |
| } |
| } |
| if(cost < child_cost_64x64) |
| { |
| merge_64x64 = 1; |
| best_mode = best_mode_temp; |
| |
| /* Update 64x64 cost if CU 64x64 is chosen */ |
| ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = cost; |
| |
| /* Accumalate the least cost for CU 64x64 */ |
| i8_frame_acc_satd_cost = cost; |
| i8_frame_acc_mode_bits_cost = ps_cu_node->ps_parent->u2_mode_bits_cost; |
| |
| /* satd and mpm bits accumalation of best cu size candiate */ |
| i4_ctb_acc_satd = ps_cu_node->ps_parent->best_satd; |
| } |
| } |
| } |
| |
| if(merge_64x64) |
| { |
| WORD32 i, j; |
| intra32_analyse_t *ps_intra32_analyse; |
| intra16_analyse_t *ps_intra16_analyse; |
| WORD32 row, col; |
| WORD32 i4_q_scale_q3_mod; |
| WORD8 i1_cu_possible_qp; |
| WORD32 i4_act_factor; |
| //ps_row_cu = ps_curr_cu; |
| ps_ctb_out->u4_cu_split_flags = 0x0; |
| ps_ed_blk_l1 = ps_ed_l1_ctb; |
| ps_ed_blk_l2 = ps_ed_l2_ctb; |
| |
| ps_l0_ipe_out_ctb->u1_split_flag = 0; |
| |
| /* If CU size of 64x64 is chosen, disbale all the 16x16 flag*/ |
| for(i = 0; i < 4; i++) |
| { |
| /* get the corresponding intra 32 analyse pointer use (blk_cnt / 16) */ |
| /* blk cnt is in terms of 8x8 units so a 32x32 will have 16 8x8 units */ |
| ps_intra32_analyse = &ps_l0_ipe_out_ctb->as_intra32_analyse[i]; |
| |
| for(j = 0; j < 4; j++) |
| { |
| /* get the corresponding intra 16 analyse pointer use (blk_cnt & 0xF / 4)*/ |
| /* blk cnt is in terms of 8x8 units so a 16x16 will have 4 8x8 units */ |
| ps_intra16_analyse = &ps_intra32_analyse->as_intra16_analyse[j]; |
| ps_intra16_analyse->b1_merge_flag = 0; |
| } |
| } |
| |
| /* CU size 64x64 and fill the final cu params */ |
| //ps_row_cu->b3_cu_pos_x = gau1_cu_pos_x[0]; |
| //ps_row_cu->b3_cu_pos_y = gau1_cu_pos_y[0]; |
| //ps_row_cu->u1_cu_size = 64; |
| |
| /* Candidate mode Update */ |
| cand_mode_list[0] = best_mode; |
| if(cand_mode_list[0] > 1) |
| { |
| if(cand_mode_list[0] == 2) |
| { |
| cand_mode_list[1] = 34; |
| cand_mode_list[2] = 3; |
| } |
| else if(cand_mode_list[0] == 34) |
| { |
| cand_mode_list[1] = 2; |
| cand_mode_list[2] = 33; |
| } |
| else |
| { |
| cand_mode_list[1] = cand_mode_list[0] - 1; |
| cand_mode_list[2] = cand_mode_list[0] + 1; |
| } |
| //cand_mode_list[1] = ps_ed_blk_l1->nang_attr.best_mode; |
| //cand_mode_list[2] = ps_ed_blk_l1->ang_attr.best_mode; |
| } |
| else |
| { |
| cand_mode_list[0] = 0; |
| cand_mode_list[1] = 1; |
| cand_mode_list[2] = 26; |
| //cand_mode_list[2] = ps_ed_blk_l1->nang_attr.best_mode; |
| } |
| |
| /* All 32x32 costs are accumalated to 64x64 cost */ |
| ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = 0; |
| for(i = 0; i < 4; i++) |
| { |
| ps_l0_ipe_out_ctb->i4_best64x64_intra_cost += |
| ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i]; |
| } |
| /* by default 64x64 modes are set to default values DC and Planar */ |
| ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[0] = cand_mode_list[0]; |
| ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[1] = cand_mode_list[1]; |
| ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[2] = cand_mode_list[2]; |
| ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[3] = 255; |
| |
| /* Update CTB mode map for the finalised CU */ |
| x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1; |
| y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1; |
| size = ps_cu_node->u1_cu_size >> 2; |
| |
| for(row = y; row < (y + size); row++) |
| { |
| for(col = x; col < (x + size); col++) |
| { |
| ps_ctxt->au1_ctb_mode_map[row][col] = best_mode; |
| } |
| } |
| |
| ihevce_set_nbr_map( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| (ps_cu_node->u2_x0 << 1), |
| (ps_cu_node->u2_y0 << 1), |
| (ps_cu_node->u1_cu_size >> 2), |
| 1); |
| |
| /*As 64*64 has won, pick L1 32x32 qp*/ |
| //ASSERT(((blk_cnt>>6) & 0xF) == (blk_cnt>>6)); |
| //ASSERT((blk_cnt>>6) == 0); |
| ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][0] != -2); |
| i1_cu_possible_qp = ihevce_cu_level_qp_mod( |
| ps_ctxt->i4_qscale, |
| ps_ed_ctb_l1->i4_32x32_satd[0][0], |
| ps_ctxt->ld_curr_frame_32x32_log_avg[0], |
| f_strength, |
| &i4_act_factor, |
| &i4_q_scale_q3_mod, |
| ps_ctxt->ps_rc_quant_ctxt); |
| |
| i8_frame_acc_satd_by_modqp_q10 = |
| (i8_frame_acc_satd_cost << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / |
| i4_q_scale_q3_mod; |
| /* Increment pointers */ |
| ps_ed_blk_l1 += 64; |
| ps_ed_blk_l2 += 16; |
| //ps_row_cu++; |
| } |
| } |
| |
| //ps_ctb_out->u1_num_cus_in_ctb = (UWORD8)(ps_row_cu - ps_curr_cu); |
| |
| { |
| WORD32 i4_i, i4_j; |
| WORD32 dummy; |
| WORD8 i1_cu_qp; |
| (void)i1_cu_qp; |
| /*MAM_VAR_L1*/ |
| for(i4_j = 0; i4_j < 2; i4_j++) |
| { |
| i4_mod_factor_num = ps_ctxt->ai4_mod_factor_derived_by_variance[i4_j]; |
| f_strength = ps_ctxt->f_strength; |
| |
| //i4_mod_factor_num = 4; |
| |
| ps_ed_blk_l1 = ps_ed_l1_ctb; |
| ps_ed_blk_l2 = ps_ed_l2_ctb; |
| //ps_row_cu = ps_curr_cu; |
| |
| /*Valid only for complete CTB */ |
| if((64 == u1_curr_ctb_wdt) && (64 == u1_curr_ctb_hgt)) |
| { |
| ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][0] != -2); |
| ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][1] != -2); |
| ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][2] != -2); |
| ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][3] != -2); |
| |
| i1_cu_qp = ihevce_cu_level_qp_mod( |
| ps_ctxt->i4_qscale, |
| ps_ed_ctb_l1->i4_32x32_satd[0][0], |
| ps_ctxt->ld_curr_frame_32x32_log_avg[0], |
| f_strength, |
| &ps_l0_ipe_out_ctb->i4_64x64_act_factor[0][i4_j], |
| &dummy, |
| ps_ctxt->ps_rc_quant_ctxt); |
| |
| i1_cu_qp = ihevce_cu_level_qp_mod( |
| ps_ctxt->i4_qscale, |
| ps_ed_ctb_l1->i4_32x32_satd[0][1], |
| ps_ctxt->ld_curr_frame_32x32_log_avg[1], |
| f_strength, |
| &ps_l0_ipe_out_ctb->i4_64x64_act_factor[1][i4_j], |
| &dummy, |
| ps_ctxt->ps_rc_quant_ctxt); |
| i1_cu_qp = ihevce_cu_level_qp_mod( |
| ps_ctxt->i4_qscale, |
| ps_ed_ctb_l1->i4_32x32_satd[0][2], |
| ps_ctxt->ld_curr_frame_32x32_log_avg[2], |
| f_strength, |
| &ps_l0_ipe_out_ctb->i4_64x64_act_factor[2][i4_j], |
| &dummy, |
| ps_ctxt->ps_rc_quant_ctxt); |
| |
| i1_cu_qp = ihevce_cu_level_qp_mod( |
| ps_ctxt->i4_qscale, |
| ps_ed_ctb_l1->i4_32x32_satd[0][3], |
| 2.0 + ps_ctxt->ld_curr_frame_16x16_log_avg[0], |
| f_strength, |
| &ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j], |
| &dummy, |
| ps_ctxt->ps_rc_quant_ctxt); |
| |
| ASSERT(ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j] > 0); |
| } |
| else |
| { |
| ps_l0_ipe_out_ctb->i4_64x64_act_factor[0][i4_j] = 1024; |
| ps_l0_ipe_out_ctb->i4_64x64_act_factor[1][i4_j] = 1024; |
| ps_l0_ipe_out_ctb->i4_64x64_act_factor[2][i4_j] = 1024; |
| ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j] = 1024; |
| } |
| |
| /*Store the 8x8 Qps from L2 (in raster order) as output of intra prediction |
| for the usage by ME*/ |
| |
| { |
| WORD32 pos_x_32, pos_y_32, pos; |
| //WORD32 i4_incomplete_ctb_val_8; |
| pos_x_32 = u1_curr_ctb_wdt / 16; |
| pos_y_32 = u1_curr_ctb_hgt / 16; |
| |
| pos = (pos_x_32 < pos_y_32) ? pos_x_32 : pos_y_32; |
| |
| for(i4_i = 0; i4_i < 4; i4_i++) |
| { |
| if(i4_i < pos) |
| { |
| ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][0] != -2); |
| ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][1] != -2); |
| ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][2] != -2); |
| i1_cu_qp = ihevce_cu_level_qp_mod( |
| ps_ctxt->i4_qscale, |
| ps_ed_ctb_l1->i4_16x16_satd[i4_i][0], |
| ps_ctxt->ld_curr_frame_16x16_log_avg[0], |
| f_strength, |
| &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][0][i4_j], |
| &dummy, |
| ps_ctxt->ps_rc_quant_ctxt); |
| i1_cu_qp = ihevce_cu_level_qp_mod( |
| ps_ctxt->i4_qscale, |
| ps_ed_ctb_l1->i4_16x16_satd[i4_i][1], |
| ps_ctxt->ld_curr_frame_16x16_log_avg[1], |
| f_strength, |
| &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][1][i4_j], |
| &dummy, |
| ps_ctxt->ps_rc_quant_ctxt); |
| i1_cu_qp = ihevce_cu_level_qp_mod( |
| ps_ctxt->i4_qscale, |
| ps_ed_ctb_l1->i4_16x16_satd[i4_i][2], |
| ps_ctxt->ld_curr_frame_16x16_log_avg[2], |
| f_strength, |
| &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][2][i4_j], |
| &dummy, |
| ps_ctxt->ps_rc_quant_ctxt); |
| } |
| else |
| { |
| /*For incomplete CTB */ |
| ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][0][i4_j] = 1024; |
| ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][1][i4_j] = 1024; |
| ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][2][i4_j] = 1024; |
| } |
| } |
| } |
| |
| /*Store the 8x8 Qps from L1 (in raster order) as output of intra prediction |
| for the usage by ME*/ |
| { |
| WORD32 pos_x_16, pos_y_16, pos; |
| //WORD32 i4_incomplete_ctb_val_8; |
| pos_x_16 = u1_curr_ctb_wdt / 4; |
| pos_y_16 = u1_curr_ctb_hgt / 4; |
| |
| pos = (pos_x_16 < pos_y_16) ? pos_x_16 : pos_y_16; |
| for(i4_i = 0; i4_i < 16; i4_i++) |
| { |
| if(i4_i < pos) |
| { |
| ASSERT(ps_ed_ctb_l1->i4_8x8_satd[i4_i][0] != -2); |
| ASSERT(ps_ed_ctb_l1->i4_8x8_satd[i4_i][1] != -2); |
| i1_cu_qp = ihevce_cu_level_qp_mod( |
| ps_ctxt->i4_qscale, |
| ps_ed_ctb_l1->i4_8x8_satd[i4_i][0], |
| ps_ctxt->ld_curr_frame_8x8_log_avg[0], |
| f_strength, |
| &ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][0][i4_j], |
| &dummy, |
| ps_ctxt->ps_rc_quant_ctxt); |
| i1_cu_qp = ihevce_cu_level_qp_mod( |
| ps_ctxt->i4_qscale, |
| ps_ed_ctb_l1->i4_8x8_satd[i4_i][1], |
| ps_ctxt->ld_curr_frame_8x8_log_avg[1], |
| f_strength, |
| &ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][1][i4_j], |
| &dummy, |
| ps_ctxt->ps_rc_quant_ctxt); |
| } |
| else |
| { |
| /*For incomplete CTB */ |
| ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][0][i4_j] = 1024; |
| ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][1][i4_j] = 1024; |
| } |
| } |
| } |
| } //for loop |
| |
| /* Accumalate the cost of ctb to the total cost */ |
| ps_ctxt->i8_frame_acc_satd_cost += i8_frame_acc_satd_cost; |
| ps_ctxt->i8_frame_acc_satd_by_modqp_q10 += i8_frame_acc_satd_by_modqp_q10; |
| |
| ps_ctxt->i8_frame_acc_mode_bits_cost += i8_frame_acc_mode_bits_cost; |
| |
| /* satd and mpm bits accumalation of best cu size candiate for the ctb */ |
| ps_l0_ipe_out_ctb->i4_ctb_acc_satd = i4_ctb_acc_satd; |
| ps_l0_ipe_out_ctb->i4_ctb_acc_mpm_bits = i8_frame_acc_mode_bits_cost; |
| |
| ps_ctxt->i8_frame_acc_satd += i4_ctb_acc_satd; |
| } |
| |
| { |
| WORD32 ctr_8x8; |
| for(ctr_8x8 = 0; ctr_8x8 < (MAX_CU_IN_CTB >> 2); ctr_8x8++) |
| { |
| /*Accumalate activity factor for Intra and Inter*/ |
| if(ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_ipe[ctr_8x8] < |
| ps_ed_ctb_l1->i4_sad_me_for_ref[ctr_8x8]) |
| { |
| ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8] = |
| ps_l0_ipe_out_ctb->i4_16x16_act_factor[ctr_8x8][1][0]; |
| } |
| else |
| { |
| ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8] = |
| ps_l0_ipe_out_ctb->i4_16x16_act_factor[ctr_8x8][1][0]; |
| } |
| |
| /*Accumalate activity factor at frame level*/ |
| ps_ctxt->i8_frame_acc_act_factor += ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8]; |
| } |
| } |
| return; |
| } |
| |
| WORD32 ihevce_nxn_sad_computer( |
| UWORD8 *pu1_inp, WORD32 i4_inp_stride, UWORD8 *pu1_ref, WORD32 i4_ref_stride, WORD32 trans_size) |
| { |
| WORD32 wd, ht, i, j; |
| WORD32 sad = 0; |
| |
| wd = trans_size; |
| ht = trans_size; |
| |
| for(i = 0; i < ht; i++) |
| { |
| for(j = 0; j < wd; j++) |
| { |
| sad += (ABS(((WORD32)pu1_inp[j] - (WORD32)pu1_ref[j]))); |
| } |
| pu1_inp += i4_inp_stride; |
| pu1_ref += i4_ref_stride; |
| } |
| |
| return sad; |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_mode_eval_filtering \endif |
| * |
| * \brief |
| * Evaluates best 3 modes for the given CU size with probable modes from, |
| * early decision structure, mpm candidates and dc, planar mode |
| * |
| * \param[in] ps_cu_node : pointer to MAX cu node info buffer |
| * \param[in] ps_child_cu_node : pointer to (MAX - 1) cu node info buffer |
| * \param[in] ps_ctxt : pointer to IPE context struct |
| * \param[in] ps_curr_src : pointer to src pixels struct |
| * \param[in] best_amode : best angular mode from l1 layer or |
| from (MAX - 1) CU mode |
| * \param[in] best_costs_4x4 : pointer to 3 best cost buffer |
| * \param[in] best_modes_4x4 : pointer to 3 best mode buffer |
| * \param[in] step2_bypass : if 0, (MAX - 1) CU is evaluated |
| * if 1, (MAX CU) sugested is evaluated |
| * \param[in] tu_eq_cu : indicates if tu size is same as cu or cu/2 |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_mode_eval_filtering( |
| ihevce_ipe_cu_tree_t *ps_cu_node, |
| ihevce_ipe_cu_tree_t *ps_child_cu_node, |
| ihevce_ipe_ctxt_t *ps_ctxt, |
| iv_enc_yuv_buf_t *ps_curr_src, |
| WORD32 best_amode, |
| WORD32 *best_costs_4x4, |
| UWORD8 *best_modes_4x4, |
| WORD32 step2_bypass, |
| WORD32 tu_eq_cu) |
| { |
| UWORD8 *pu1_origin, *pu1_orig; |
| WORD32 src_strd = ps_curr_src->i4_y_strd; |
| WORD32 nbr_flags; |
| nbr_avail_flags_t s_nbr; |
| WORD32 trans_size = tu_eq_cu ? ps_cu_node->u1_cu_size : ps_cu_node->u1_cu_size >> 1; |
| WORD32 num_tu_in_x = tu_eq_cu ? 1 : 2; |
| WORD32 num_tu_in_y = tu_eq_cu ? 1 : 2; |
| UWORD8 mode; |
| |
| WORD32 cost_ang_mode = MAX_INTRA_COST_IPE; |
| WORD32 filter_flag; |
| WORD32 cost_amode_step2[7] = { 0 }; |
| /*WORD32 best_sad[5]; // NOTE_A01: Not getting consumed at present */ |
| WORD32 sad = 0; |
| WORD32 cu_pos_x, cu_pos_y; |
| WORD32 temp; |
| WORD32 i = 0, j, k, i_end, z; |
| //WORD32 row, col, size; |
| UWORD8 *pu1_ref; |
| WORD32 xA, yA, xB, yB; |
| WORD32 top_intra_mode; |
| WORD32 left_intra_mode; |
| UWORD8 *pu1_ref_orig = &ps_ctxt->au1_ref_samples[0]; |
| UWORD8 *pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0]; |
| |
| UWORD8 modes_4x4[5] = { 0, 1, 2, 3, 4 }; |
| WORD32 count; |
| |
| pf_ipe_res_trans_had apf_resd_trns_had[4]; |
| |
| WORD32 cand_mode_satd_list[3]; |
| ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr; |
| |
| ihevc_intra_pred_luma_ref_substitution_fptr = |
| ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr; |
| |
| apf_resd_trns_had[0] = ps_ctxt->s_cmn_opt_func.pf_HAD_4x4_8bit; |
| apf_resd_trns_had[1] = ps_ctxt->s_cmn_opt_func.pf_HAD_8x8_8bit; |
| apf_resd_trns_had[2] = ps_ctxt->s_cmn_opt_func.pf_HAD_16x16_8bit; |
| apf_resd_trns_had[3] = ps_ctxt->s_cmn_opt_func.pf_HAD_32x32_8bit; |
| |
| /* initialize modes_to_eval as zero */ |
| memset(&ps_ctxt->au1_modes_to_eval, 0, MAX_NUM_IP_MODES); |
| |
| /* Compute the Parent Cost */ |
| |
| /* Pointer to top-left of the CU - y0,x0 in 8x8 granularity */ |
| pu1_orig = (UWORD8 *)(ps_curr_src->pv_y_buf) + ((ps_cu_node->u2_y0 << 3) * src_strd) + |
| (ps_cu_node->u2_x0 << 3); |
| |
| /* Get position of CU within CTB at 4x4 granularity */ |
| cu_pos_x = ps_cu_node->u2_x0 << 1; |
| cu_pos_y = ps_cu_node->u2_y0 << 1; |
| |
| /* get the neighbour availability flags */ |
| ihevce_get_only_nbr_flag( |
| &s_nbr, |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| cu_pos_x, |
| cu_pos_y, |
| trans_size >> 2, |
| trans_size >> 2); |
| |
| /* Traverse for all 4 child blocks in the parent block */ |
| xA = (ps_cu_node->u2_x0 << 3) >> 2; |
| yA = ((ps_cu_node->u2_y0 << 3) >> 2) + 1; |
| xB = xA + 1; |
| yB = yA - 1; |
| left_intra_mode = ps_ctxt->au1_ctb_mode_map[yA][xA]; |
| top_intra_mode = ps_ctxt->au1_ctb_mode_map[yB][xB]; |
| /* call the function which populates sad cost for all the modes */ |
| |
| ihevce_intra_populate_mode_bits_cost_bracketing( |
| top_intra_mode, |
| left_intra_mode, |
| s_nbr.u1_top_avail, |
| s_nbr.u1_left_avail, |
| ps_cu_node->u2_y0, |
| &ps_ctxt->au2_mode_bits_satd_cost[0], |
| &ps_ctxt->au2_mode_bits_satd[0], |
| ps_ctxt->i4_ol_satd_lambda, |
| cand_mode_satd_list); |
| |
| for(k = 0; k < num_tu_in_y; k++) |
| { |
| for(j = 0; j < num_tu_in_x; j++) |
| { |
| /* get the neighbour availability flags */ |
| nbr_flags = ihevce_get_nbr_intra( |
| &s_nbr, |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| cu_pos_x + ((j) * (trans_size >> 2)), |
| cu_pos_y + ((k) * (trans_size >> 2)), |
| trans_size >> 2); |
| |
| pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size); |
| |
| /* Create reference samples array */ |
| ihevc_intra_pred_luma_ref_substitution_fptr( |
| pu1_origin - src_strd - 1, |
| pu1_origin - src_strd, |
| pu1_origin - 1, |
| src_strd, |
| trans_size, |
| nbr_flags, |
| pu1_ref_orig, |
| 0); |
| |
| /* Perform reference samples filtering */ |
| ihevce_intra_pred_ref_filtering(pu1_ref_orig, trans_size, pu1_ref_filt); |
| |
| ihevce_set_nbr_map( |
| ps_ctxt->pu1_ctb_nbr_map, |
| ps_ctxt->i4_nbr_map_strd, |
| cu_pos_x + ((j) * (trans_size >> 2)), |
| cu_pos_y + ((k) * (trans_size >> 2)), |
| (trans_size >> 2), |
| 1); |
| |
| pu1_ref_orig += (4 * MAX_CTB_SIZE + 1); |
| pu1_ref_filt += (4 * MAX_CTB_SIZE + 1); |
| } |
| } |
| |
| /* Revaluation for angular mode */ |
| //if(ps_ed_blk->ang_attr.mode_present == 1) |
| //if(((best_amode & 0x1) != 1)) |
| |
| { |
| WORD32 u1_trans_idx = trans_size >> 3; |
| if(trans_size == 32) |
| u1_trans_idx = 3; |
| //best_amode = ps_ed_blk->ang_attr.best_mode; |
| |
| i = 0; |
| if(!step2_bypass) |
| { |
| /* Around best level 4 angular mode, search for best level 2 mode */ |
| ASSERT((best_amode >= 2) && (best_amode <= 34)); |
| |
| if(ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P3) |
| { |
| if(best_amode >= 4) |
| ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode - 2; |
| } |
| |
| ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode; |
| |
| if(ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P3) |
| { |
| if(best_amode <= 32) |
| ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode + 2; |
| } |
| } |
| else |
| { |
| ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[0]->best_mode; |
| ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[1]->best_mode; |
| ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[2]->best_mode; |
| ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[3]->best_mode; |
| } |
| |
| /* Add the left and top MPM modes for computation*/ |
| |
| ps_ctxt->au1_modes_to_eval_temp[i++] = cand_mode_satd_list[0]; |
| ps_ctxt->au1_modes_to_eval_temp[i++] = cand_mode_satd_list[1]; |
| |
| i_end = i; |
| count = 0; |
| |
| /*Remove duplicate modes from modes_to_eval_temp[] */ |
| for(j = 0; j < i_end; j++) |
| { |
| for(k = 0; k < count; k++) |
| { |
| if(ps_ctxt->au1_modes_to_eval_temp[j] == ps_ctxt->au1_modes_to_eval[k]) |
| break; |
| } |
| if((k == count) && (ps_ctxt->au1_modes_to_eval_temp[j] > 1)) |
| { |
| ps_ctxt->au1_modes_to_eval[count] = ps_ctxt->au1_modes_to_eval_temp[j]; |
| count++; |
| } |
| } |
| i_end = count; |
| if(count == 0) |
| { |
| ps_ctxt->au1_modes_to_eval[0] = 26; |
| i_end = 1; |
| } |
| |
| for(i = 0; i < i_end; i++) |
| { |
| pu1_ref_orig = &ps_ctxt->au1_ref_samples[0]; |
| pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0]; |
| |
| mode = ps_ctxt->au1_modes_to_eval[i]; |
| ASSERT((mode >= 2) && (mode <= 34)); |
| cost_amode_step2[i] = ps_ctxt->au2_mode_bits_satd_cost[mode]; |
| filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(trans_size) - 2)); |
| |
| for(k = 0; k < num_tu_in_y; k++) |
| { |
| for(j = 0; j < num_tu_in_x; j++) |
| { |
| pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size); |
| |
| if(0 == filter_flag) |
| pu1_ref = pu1_ref_orig; |
| else |
| pu1_ref = pu1_ref_filt; |
| |
| g_apf_lum_ip[g_i4_ip_funcs[mode]]( |
| pu1_ref, 0, &ps_ctxt->au1_pred_samples[0], trans_size, trans_size, mode); |
| |
| if(ps_ctxt->u1_use_satd) |
| { |
| sad = apf_resd_trns_had[u1_trans_idx]( |
| pu1_origin, |
| ps_curr_src->i4_y_strd, |
| &ps_ctxt->au1_pred_samples[0], |
| trans_size, |
| NULL, |
| 0 |
| |
| ); |
| } |
| else |
| { |
| sad = ps_ctxt->s_ipe_optimised_function_list.pf_nxn_sad_computer( |
| pu1_origin, |
| ps_curr_src->i4_y_strd, |
| &ps_ctxt->au1_pred_samples[0], |
| trans_size, |
| trans_size); |
| } |
| |
| cost_amode_step2[i] += sad; |
| |
| pu1_ref_orig += (4 * MAX_CTB_SIZE + 1); |
| pu1_ref_filt += (4 * MAX_CTB_SIZE + 1); |
| } |
| } |
| } |
| best_amode = ps_ctxt->au1_modes_to_eval[0]; |
| /*Init cost indx */ |
| cost_ang_mode = MAX_INTRA_COST_IPE; //cost_amode_step2[0]; |
| for(z = 0; z < i_end; z++) |
| { |
| /* Least cost of all 3 angles are stored in cost_amode_step2[0] and corr. mode*/ |
| if(cost_ang_mode >= cost_amode_step2[z]) |
| { |
| if(cost_ang_mode == cost_amode_step2[z]) |
| { |
| if(best_amode > ps_ctxt->au1_modes_to_eval[z]) |
| best_amode = ps_ctxt->au1_modes_to_eval[z]; |
| } |
| else |
| { |
| best_amode = ps_ctxt->au1_modes_to_eval[z]; |
| } |
| cost_ang_mode = cost_amode_step2[z]; |
| } |
| } |
| |
| /*Modify mode bits for the angular modes */ |
| } |
| |
| { |
| /* Step - I modification */ |
| ASSERT((best_amode >= 2) && (best_amode <= 34)); |
| i_end = 0; |
| z = 0; |
| |
| /* Around best level 3 angular mode, search for best level 1 mode */ |
| ps_ctxt->au1_modes_to_eval[i_end++] = 0; |
| ps_ctxt->au1_modes_to_eval[i_end++] = 1; |
| |
| if(best_amode != 2) |
| ps_ctxt->au1_modes_to_eval[i_end++] = best_amode - 1; |
| |
| ps_ctxt->au1_modes_to_eval[i_end++] = best_amode; |
| |
| if(best_amode != 34) |
| ps_ctxt->au1_modes_to_eval[i_end++] = best_amode + 1; |
| |
| /* Inserting step_2's best mode at last to avoid |
| recalculation of it's SATD cost */ |
| |
| //ps_ctxt->au1_modes_to_eval[i_end] = best_amode; //Bugfix: HSAD compared with SAD |
| //cost_amode_step2[i_end] = cost_ang_mode; |
| |
| /*best_sad[i_end] = cost_ang_mode |
| - mode_bits_satd_cost[best_amode]; //See NOTE_A01 above */ |
| |
| cost_ang_mode = MAX_INTRA_COST_IPE; /* Init cost */ |
| |
| for(i = 0; i < i_end; i++) |
| { |
| WORD32 u1_trans_idx = trans_size >> 3; |
| if(trans_size == 32) |
| u1_trans_idx = 3; |
| pu1_ref_orig = &ps_ctxt->au1_ref_samples[0]; |
| pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0]; |
| |
| /*best_sad[i] = 0; //See NOTE_A01 above */ |
| mode = ps_ctxt->au1_modes_to_eval[i]; |
| cost_amode_step2[i] = ps_ctxt->au2_mode_bits_satd_cost[mode]; |
| filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(trans_size) - 2)); |
| |
| for(k = 0; k < num_tu_in_y; k++) |
| { |
| for(j = 0; j < num_tu_in_x; j++) |
| { |
| pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size); |
| |
| if(0 == filter_flag) |
| pu1_ref = pu1_ref_orig; |
| else |
| pu1_ref = pu1_ref_filt; |
| |
| g_apf_lum_ip[g_i4_ip_funcs[mode]]( |
| pu1_ref, 0, &ps_ctxt->au1_pred_samples[0], trans_size, trans_size, mode); |
| |
| //if(trans_size != 4) |
| { |
| sad = apf_resd_trns_had[u1_trans_idx]( |
| pu1_origin, |
| ps_curr_src->i4_y_strd, |
| &ps_ctxt->au1_pred_samples[0], |
| trans_size, |
| NULL, |
| 0); |
| } |
| |
| /*accumualting SATD though name says it is sad*/ |
| cost_amode_step2[i] += sad; |
| /*best_sad[i] +=sad; //See NOTE_A01 above */ |
| pu1_ref_orig += (4 * MAX_CTB_SIZE + 1); |
| pu1_ref_filt += (4 * MAX_CTB_SIZE + 1); |
| } |
| } |
| } |
| /* Updating i_end for the step_2's inserted mode*/ |
| // i_end++; |
| |
| /* Arrange the reference array in ascending order */ |
| |
| for(i = 0; i < (i_end - 1); i++) |
| { |
| for(j = i + 1; j < i_end; j++) |
| { |
| if(cost_amode_step2[i] > cost_amode_step2[j]) |
| { |
| temp = cost_amode_step2[i]; |
| cost_amode_step2[i] = cost_amode_step2[j]; |
| cost_amode_step2[j] = temp; |
| |
| temp = modes_4x4[i]; |
| modes_4x4[i] = modes_4x4[j]; |
| modes_4x4[j] = temp; |
| } |
| } |
| } |
| |
| /* Least cost of all 3 angles are stored in cost_amode_step2[0] and corr. mode*/ |
| best_amode = ps_ctxt->au1_modes_to_eval[modes_4x4[0]]; |
| cost_ang_mode = cost_amode_step2[0]; |
| ps_cu_node->best_satd = cost_ang_mode - ps_ctxt->au2_mode_bits_satd_cost[best_amode]; |
| ps_cu_node->best_cost = cost_amode_step2[0]; |
| ps_cu_node->best_mode = ps_ctxt->au1_modes_to_eval[modes_4x4[0]]; |
| ps_cu_node->best_satd = |
| ps_cu_node->best_cost - ps_ctxt->au2_mode_bits_satd_cost[ps_cu_node->best_mode]; |
| |
| /*Accumalate best mode bits cost for RC*/ |
| ps_cu_node->u2_mode_bits_cost = ps_ctxt->au2_mode_bits_satd[ps_cu_node->best_mode]; |
| |
| /* Store the best three candidates */ |
| for(i = 0; i < 3; i++) |
| { |
| best_costs_4x4[i] = cost_amode_step2[i]; |
| best_modes_4x4[i] = ps_ctxt->au1_modes_to_eval[modes_4x4[i]]; |
| } |
| } |
| |
| return; |
| } |