| /****************************************************************************** |
| * |
| * Copyright (C) 2018 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at: |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| ***************************************************************************** |
| * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| */ |
| /** |
| ****************************************************************************** |
| * @file hme_refine.c |
| * |
| * @brief |
| * Contains the implementation of the refinement layer searches and related |
| * functionality like CU merge. |
| * |
| * @author |
| * Ittiam |
| * |
| * |
| * List of Functions |
| * |
| * |
| ****************************************************************************** |
| */ |
| |
| /*****************************************************************************/ |
| /* File Includes */ |
| /*****************************************************************************/ |
| /* System include files */ |
| #include <stdio.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <assert.h> |
| #include <stdarg.h> |
| #include <math.h> |
| #include <limits.h> |
| |
| /* User include files */ |
| #include "ihevc_typedefs.h" |
| #include "itt_video_api.h" |
| #include "ihevce_api.h" |
| |
| #include "rc_cntrl_param.h" |
| #include "rc_frame_info_collector.h" |
| #include "rc_look_ahead_params.h" |
| |
| #include "ihevc_defs.h" |
| #include "ihevc_structs.h" |
| #include "ihevc_platform_macros.h" |
| #include "ihevc_deblk.h" |
| #include "ihevc_itrans_recon.h" |
| #include "ihevc_chroma_itrans_recon.h" |
| #include "ihevc_chroma_intra_pred.h" |
| #include "ihevc_intra_pred.h" |
| #include "ihevc_inter_pred.h" |
| #include "ihevc_mem_fns.h" |
| #include "ihevc_padding.h" |
| #include "ihevc_weighted_pred.h" |
| #include "ihevc_sao.h" |
| #include "ihevc_resi_trans.h" |
| #include "ihevc_quant_iquant_ssd.h" |
| #include "ihevc_cabac_tables.h" |
| |
| #include "ihevce_defs.h" |
| #include "ihevce_lap_enc_structs.h" |
| #include "ihevce_multi_thrd_structs.h" |
| #include "ihevce_multi_thrd_funcs.h" |
| #include "ihevce_me_common_defs.h" |
| #include "ihevce_had_satd.h" |
| #include "ihevce_error_codes.h" |
| #include "ihevce_bitstream.h" |
| #include "ihevce_cabac.h" |
| #include "ihevce_rdoq_macros.h" |
| #include "ihevce_function_selector.h" |
| #include "ihevce_enc_structs.h" |
| #include "ihevce_entropy_structs.h" |
| #include "ihevce_cmn_utils_instr_set_router.h" |
| #include "ihevce_enc_loop_structs.h" |
| #include "ihevce_bs_compute_ctb.h" |
| #include "ihevce_global_tables.h" |
| #include "ihevce_dep_mngr_interface.h" |
| #include "hme_datatype.h" |
| #include "hme_interface.h" |
| #include "hme_common_defs.h" |
| #include "hme_defs.h" |
| #include "ihevce_me_instr_set_router.h" |
| #include "hme_globals.h" |
| #include "hme_utils.h" |
| #include "hme_coarse.h" |
| #include "hme_fullpel.h" |
| #include "hme_subpel.h" |
| #include "hme_refine.h" |
| #include "hme_err_compute.h" |
| #include "hme_common_utils.h" |
| #include "hme_search_algo.h" |
| #include "ihevce_stasino_helpers.h" |
| #include "ihevce_common_utils.h" |
| |
| /*****************************************************************************/ |
| /* Globals */ |
| /*****************************************************************************/ |
| |
| /* brief: mapping buffer to convert raster scan indices into z-scan oder in a ctb */ |
| UWORD8 gau1_raster_scan_to_ctb[4][4] = { |
| { 0, 4, 16, 20 }, { 8, 12, 24, 28 }, { 32, 36, 48, 52 }, { 40, 44, 56, 60 } |
| }; |
| |
| /*****************************************************************************/ |
| /* Extern Fucntion declaration */ |
| /*****************************************************************************/ |
| extern ctb_boundary_attrs_t * |
| get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt); |
| |
| typedef void (*PF_HME_PROJECT_COLOC_CANDT_FXN)( |
| search_node_t *ps_search_node, |
| layer_ctxt_t *ps_curr_layer, |
| layer_ctxt_t *ps_coarse_layer, |
| S32 i4_pos_x, |
| S32 i4_pos_y, |
| S08 i1_ref_id, |
| S32 i4_result_id); |
| |
| typedef void (*PF_HME_PROJECT_COLOC_CANDT_L0_ME_FXN)( |
| search_node_t *ps_search_node, |
| layer_ctxt_t *ps_curr_layer, |
| layer_ctxt_t *ps_coarse_layer, |
| S32 i4_pos_x, |
| S32 i4_pos_y, |
| S32 i4_num_act_ref_l0, |
| U08 u1_pred_dir, |
| U08 u1_default_ref_id, |
| S32 i4_result_id); |
| |
| /*****************************************************************************/ |
| /* Function Definitions */ |
| /*****************************************************************************/ |
| |
| void ihevce_no_wt_copy( |
| coarse_me_ctxt_t *ps_ctxt, |
| layer_ctxt_t *ps_curr_layer, |
| pu_t *ps_pu, |
| UWORD8 *pu1_temp_pred, |
| WORD32 temp_stride, |
| WORD32 blk_x, |
| WORD32 blk_y) |
| { |
| UWORD8 *pu1_ref; |
| WORD32 ref_stride, ref_offset; |
| WORD32 row, col, i4_tmp; |
| |
| ASSERT((ps_pu->b2_pred_mode == PRED_L0) || (ps_pu->b2_pred_mode == PRED_L1)); |
| |
| if(ps_pu->b2_pred_mode == PRED_L0) |
| { |
| WORD8 i1_ref_idx; |
| |
| i1_ref_idx = ps_pu->mv.i1_l0_ref_idx; |
| pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx]; |
| |
| ref_stride = ps_curr_layer->i4_inp_stride; |
| |
| ref_offset = ((blk_y << 3) + ps_pu->mv.s_l0_mv.i2_mvy) * ref_stride; |
| ref_offset += (blk_x << 3) + ps_pu->mv.s_l0_mv.i2_mvx; |
| |
| pu1_ref += ref_offset; |
| |
| for(row = 0; row < temp_stride; row++) |
| { |
| for(col = 0; col < temp_stride; col++) |
| { |
| i4_tmp = pu1_ref[col]; |
| pu1_temp_pred[col] = CLIP_U8(i4_tmp); |
| } |
| |
| pu1_ref += ref_stride; |
| pu1_temp_pred += temp_stride; |
| } |
| } |
| else |
| { |
| WORD8 i1_ref_idx; |
| |
| i1_ref_idx = ps_pu->mv.i1_l1_ref_idx; |
| pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx]; |
| |
| ref_stride = ps_curr_layer->i4_inp_stride; |
| |
| ref_offset = ((blk_y << 3) + ps_pu->mv.s_l1_mv.i2_mvy) * ref_stride; |
| ref_offset += (blk_x << 3) + ps_pu->mv.s_l1_mv.i2_mvx; |
| |
| pu1_ref += ref_offset; |
| |
| for(row = 0; row < temp_stride; row++) |
| { |
| for(col = 0; col < temp_stride; col++) |
| { |
| i4_tmp = pu1_ref[col]; |
| pu1_temp_pred[col] = CLIP_U8(i4_tmp); |
| } |
| |
| pu1_ref += ref_stride; |
| pu1_temp_pred += temp_stride; |
| } |
| } |
| } |
| |
| static WORD32 hme_add_clustered_mvs_as_merge_cands( |
| cluster_data_t *ps_cluster_base, |
| search_node_t *ps_merge_cand, |
| range_prms_t **pps_range_prms, |
| U08 *pu1_refid_to_pred_dir_list, |
| WORD32 i4_num_clusters, |
| U08 u1_pred_dir) |
| { |
| WORD32 i, j, k; |
| WORD32 i4_num_cands_added = 0; |
| WORD32 i4_num_mvs_in_cluster; |
| |
| for(i = 0; i < i4_num_clusters; i++) |
| { |
| cluster_data_t *ps_data = &ps_cluster_base[i]; |
| |
| if(u1_pred_dir == !pu1_refid_to_pred_dir_list[ps_data->ref_id]) |
| { |
| i4_num_mvs_in_cluster = ps_data->num_mvs; |
| |
| for(j = 0; j < i4_num_mvs_in_cluster; j++) |
| { |
| ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_data->as_mv[j].mvx; |
| ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_data->as_mv[j].mvy; |
| ps_merge_cand[i4_num_cands_added].i1_ref_idx = ps_data->ref_id; |
| |
| CLIP_MV_WITHIN_RANGE( |
| ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx, |
| ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy, |
| pps_range_prms[ps_data->ref_id], |
| 0, |
| 0, |
| 0); |
| |
| for(k = 0; k < i4_num_cands_added; k++) |
| { |
| if((ps_merge_cand[k].s_mv.i2_mvx == ps_data->as_mv[j].mvx) && |
| (ps_merge_cand[k].s_mv.i2_mvy == ps_data->as_mv[j].mvy) && |
| (ps_merge_cand[k].i1_ref_idx == ps_data->ref_id)) |
| { |
| break; |
| } |
| } |
| |
| if(k == i4_num_cands_added) |
| { |
| i4_num_cands_added++; |
| } |
| } |
| } |
| } |
| |
| return i4_num_cands_added; |
| } |
| |
| static WORD32 hme_add_me_best_as_merge_cands( |
| search_results_t **pps_child_data_array, |
| inter_cu_results_t *ps_8x8cu_results, |
| search_node_t *ps_merge_cand, |
| range_prms_t **pps_range_prms, |
| U08 *pu1_refid_to_pred_dir_list, |
| S08 *pi1_past_list, |
| S08 *pi1_future_list, |
| BLK_SIZE_T e_blk_size, |
| ME_QUALITY_PRESETS_T e_quality_preset, |
| S32 i4_num_cands_added, |
| U08 u1_pred_dir) |
| { |
| WORD32 i, j, k; |
| WORD32 i4_max_cands_to_add; |
| |
| WORD32 i4_result_id = 0; |
| |
| ASSERT(!pps_child_data_array[0]->u1_split_flag || (BLK_64x64 != e_blk_size)); |
| ASSERT(!pps_child_data_array[1]->u1_split_flag || (BLK_64x64 != e_blk_size)); |
| ASSERT(!pps_child_data_array[2]->u1_split_flag || (BLK_64x64 != e_blk_size)); |
| ASSERT(!pps_child_data_array[3]->u1_split_flag || (BLK_64x64 != e_blk_size)); |
| |
| switch(e_quality_preset) |
| { |
| case ME_PRISTINE_QUALITY: |
| { |
| i4_max_cands_to_add = MAX_MERGE_CANDTS; |
| |
| break; |
| } |
| case ME_HIGH_QUALITY: |
| { |
| /* All 4 children are split and each grandchild contributes an MV */ |
| /* and 2 best results per grandchild */ |
| i4_max_cands_to_add = 4 * 4 * 2; |
| |
| break; |
| } |
| case ME_MEDIUM_SPEED: |
| { |
| i4_max_cands_to_add = 4 * 2 * 2; |
| |
| break; |
| } |
| case ME_HIGH_SPEED: |
| case ME_XTREME_SPEED: |
| case ME_XTREME_SPEED_25: |
| { |
| i4_max_cands_to_add = 4 * 2 * 1; |
| |
| break; |
| } |
| } |
| |
| while(i4_result_id < 4) |
| { |
| for(i = 0; i < 4; i++) |
| { |
| inter_cu_results_t *ps_child_data = pps_child_data_array[i]->ps_cu_results; |
| inter_cu_results_t *ps_grandchild_data = &ps_8x8cu_results[i << 2]; |
| |
| if(!pps_child_data_array[i]->u1_split_flag) |
| { |
| part_type_results_t *ps_data = &ps_child_data->ps_best_results[i4_result_id]; |
| |
| if(ps_child_data->u1_num_best_results <= i4_result_id) |
| { |
| continue; |
| } |
| |
| if(ps_data->as_pu_results->pu.b1_intra_flag) |
| { |
| continue; |
| } |
| |
| for(j = 0; j <= (ps_data->u1_part_type != PRT_2Nx2N); j++) |
| { |
| mv_t *ps_mv; |
| |
| S08 i1_ref_idx; |
| |
| pu_t *ps_pu = &ps_data->as_pu_results[j].pu; |
| |
| if(u1_pred_dir != |
| ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode)) |
| { |
| continue; |
| } |
| |
| if(u1_pred_dir) |
| { |
| ps_mv = &ps_pu->mv.s_l1_mv; |
| i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx]; |
| } |
| else |
| { |
| ps_mv = &ps_pu->mv.s_l0_mv; |
| i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx]; |
| } |
| |
| if(-1 == i1_ref_idx) |
| { |
| continue; |
| } |
| |
| ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx; |
| ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy; |
| ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx; |
| |
| CLIP_MV_WITHIN_RANGE( |
| ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx, |
| ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy, |
| pps_range_prms[i1_ref_idx], |
| 0, |
| 0, |
| 0); |
| |
| for(k = 0; k < i4_num_cands_added; k++) |
| { |
| if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) && |
| (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) && |
| (ps_merge_cand[k].i1_ref_idx == i1_ref_idx)) |
| { |
| break; |
| } |
| } |
| |
| if(k == i4_num_cands_added) |
| { |
| i4_num_cands_added++; |
| |
| if(i4_max_cands_to_add <= i4_num_cands_added) |
| { |
| return i4_num_cands_added; |
| } |
| } |
| } |
| } |
| else |
| { |
| for(j = 0; j < 4; j++) |
| { |
| mv_t *ps_mv; |
| |
| S08 i1_ref_idx; |
| |
| part_type_results_t *ps_data = ps_grandchild_data[j].ps_best_results; |
| pu_t *ps_pu = &ps_data->as_pu_results[0].pu; |
| |
| ASSERT(ps_data->u1_part_type == PRT_2Nx2N); |
| |
| if(ps_grandchild_data[j].u1_num_best_results <= i4_result_id) |
| { |
| continue; |
| } |
| |
| if(ps_data->as_pu_results->pu.b1_intra_flag) |
| { |
| continue; |
| } |
| |
| if(u1_pred_dir != |
| ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode)) |
| { |
| continue; |
| } |
| |
| if(u1_pred_dir) |
| { |
| ps_mv = &ps_pu->mv.s_l1_mv; |
| i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx]; |
| } |
| else |
| { |
| ps_mv = &ps_pu->mv.s_l0_mv; |
| i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx]; |
| } |
| |
| ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx; |
| ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy; |
| ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx; |
| |
| CLIP_MV_WITHIN_RANGE( |
| ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx, |
| ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy, |
| pps_range_prms[i1_ref_idx], |
| 0, |
| 0, |
| 0); |
| |
| for(k = 0; k < i4_num_cands_added; k++) |
| { |
| if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) && |
| (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) && |
| (ps_merge_cand[k].i1_ref_idx == i1_ref_idx)) |
| { |
| break; |
| } |
| } |
| |
| if(k == i4_num_cands_added) |
| { |
| i4_num_cands_added++; |
| |
| if(i4_max_cands_to_add <= i4_num_cands_added) |
| { |
| return i4_num_cands_added; |
| } |
| } |
| } |
| } |
| } |
| |
| i4_result_id++; |
| } |
| |
| return i4_num_cands_added; |
| } |
| |
| WORD32 hme_add_cands_for_merge_eval( |
| ctb_cluster_info_t *ps_cluster_info, |
| search_results_t **pps_child_data_array, |
| inter_cu_results_t *ps_8x8cu_results, |
| range_prms_t **pps_range_prms, |
| search_node_t *ps_merge_cand, |
| U08 *pu1_refid_to_pred_dir_list, |
| S08 *pi1_past_list, |
| S08 *pi1_future_list, |
| ME_QUALITY_PRESETS_T e_quality_preset, |
| BLK_SIZE_T e_blk_size, |
| U08 u1_pred_dir, |
| U08 u1_blk_id) |
| { |
| WORD32 i4_num_cands_added = 0; |
| |
| if(ME_PRISTINE_QUALITY == e_quality_preset) |
| { |
| cluster_data_t *ps_cluster_primo; |
| |
| WORD32 i4_num_clusters; |
| |
| if(BLK_32x32 == e_blk_size) |
| { |
| ps_cluster_primo = ps_cluster_info->ps_32x32_blk[u1_blk_id].as_cluster_data; |
| i4_num_clusters = ps_cluster_info->ps_32x32_blk[u1_blk_id].num_clusters; |
| } |
| else |
| { |
| ps_cluster_primo = ps_cluster_info->ps_64x64_blk->as_cluster_data; |
| i4_num_clusters = ps_cluster_info->ps_64x64_blk->num_clusters; |
| } |
| |
| i4_num_cands_added = hme_add_clustered_mvs_as_merge_cands( |
| ps_cluster_primo, |
| ps_merge_cand, |
| pps_range_prms, |
| pu1_refid_to_pred_dir_list, |
| i4_num_clusters, |
| u1_pred_dir); |
| } |
| |
| i4_num_cands_added = hme_add_me_best_as_merge_cands( |
| pps_child_data_array, |
| ps_8x8cu_results, |
| ps_merge_cand, |
| pps_range_prms, |
| pu1_refid_to_pred_dir_list, |
| pi1_past_list, |
| pi1_future_list, |
| e_blk_size, |
| e_quality_preset, |
| i4_num_cands_added, |
| u1_pred_dir); |
| |
| return i4_num_cands_added; |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_pick_refine_merge_candts(hme_merge_prms_t *ps_merge_prms, |
| * S08 i1_ref_idx, |
| * S32 i4_best_part_type, |
| * S32 i4_is_vert) |
| * |
| * @brief Given a target partition orientation in the merged CU, and the |
| * partition type of most likely partition this fxn picks up |
| * candidates from the 4 constituent CUs and does refinement search |
| * to identify best results for the merge CU across active partitions |
| * |
| * @param[in,out] ps_merge_prms : Parameters sent from higher layers. Out of |
| * these params, the search result structure is also derived and |
| * updated during the search |
| * |
| * @param[in] i1_ref_idx : ID of the buffer within the search results to update. |
| * Will be 0 if all refidx collapsed to one buf, else it'll be 0/1 |
| * |
| * @param[in] i4_best_part_type : partition type of potential partition in the |
| * merged CU, -1 if the merge process has not yet been able to |
| * determine this. |
| * |
| * @param[in] i4_is_vert : Whether target partition of merged CU is vertical |
| * orientation or horizontal orientation. |
| * |
| * @return Number of merge candidates |
| ******************************************************************************** |
| */ |
| WORD32 hme_pick_eval_merge_candts( |
| hme_merge_prms_t *ps_merge_prms, |
| hme_subpel_prms_t *ps_subpel_prms, |
| S32 i4_search_idx, |
| S32 i4_best_part_type, |
| S32 i4_is_vert, |
| wgt_pred_ctxt_t *ps_wt_inp_prms, |
| S32 i4_frm_qstep, |
| ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list, |
| ihevce_me_optimised_function_list_t *ps_me_optimised_function_list) |
| { |
| S32 x_off, y_off; |
| search_node_t *ps_search_node; |
| S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1]; |
| S32 i4_num_valid_parts; |
| pred_ctxt_t *ps_pred_ctxt; |
| |
| search_node_t as_merge_unique_node[MAX_MERGE_CANDTS]; |
| S32 num_unique_nodes_cu_merge = 0; |
| |
| search_results_t *ps_search_results = ps_merge_prms->ps_results_merge; |
| CU_SIZE_T e_cu_size = ps_search_results->e_cu_size; |
| S32 i4_part_mask = ps_search_results->i4_part_mask; |
| |
| search_results_t *aps_child_results[4]; |
| layer_ctxt_t *ps_curr_layer = ps_merge_prms->ps_layer_ctxt; |
| |
| S32 i4_ref_stride, i, j; |
| result_upd_prms_t s_result_prms; |
| |
| BLK_SIZE_T e_blk_size = ge_cu_size_to_blk_size[e_cu_size]; |
| S32 i4_offset; |
| |
| /*************************************************************************/ |
| /* Function pointer for SAD/SATD, array and prms structure to pass to */ |
| /* This function */ |
| /*************************************************************************/ |
| PF_SAD_FXN_T pf_err_compute; |
| S32 ai4_sad_grid[9][17]; |
| err_prms_t s_err_prms; |
| |
| /*************************************************************************/ |
| /* Allowed MV RANGE */ |
| /*************************************************************************/ |
| range_prms_t **pps_range_prms = ps_merge_prms->aps_mv_range; |
| PF_INTERP_FXN_T pf_qpel_interp; |
| PF_MV_COST_FXN pf_mv_cost_compute; |
| WORD32 pred_lx; |
| U08 *apu1_hpel_ref[4]; |
| |
| interp_prms_t s_interp_prms; |
| S32 i4_interp_buf_id; |
| |
| S32 i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off; |
| S32 i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off; |
| |
| /* Sanity checks */ |
| ASSERT((e_blk_size == BLK_64x64) || (e_blk_size == BLK_32x32)); |
| |
| s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list; |
| |
| /* Initialize all the ptrs to child CUs for merge decision */ |
| aps_child_results[0] = ps_merge_prms->ps_results_tl; |
| aps_child_results[1] = ps_merge_prms->ps_results_tr; |
| aps_child_results[2] = ps_merge_prms->ps_results_bl; |
| aps_child_results[3] = ps_merge_prms->ps_results_br; |
| |
| num_unique_nodes_cu_merge = 0; |
| |
| pf_mv_cost_compute = compute_mv_cost_implicit_high_speed; |
| |
| if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset) |
| { |
| num_unique_nodes_cu_merge = hme_add_cands_for_merge_eval( |
| ps_merge_prms->ps_cluster_info, |
| aps_child_results, |
| ps_merge_prms->ps_8x8_cu_results, |
| pps_range_prms, |
| as_merge_unique_node, |
| ps_search_results->pu1_is_past, |
| ps_merge_prms->pi1_past_list, |
| ps_merge_prms->pi1_future_list, |
| ps_merge_prms->e_quality_preset, |
| e_blk_size, |
| i4_search_idx, |
| (ps_merge_prms->ps_results_merge->u1_x_off >> 5) + |
| (ps_merge_prms->ps_results_merge->u1_y_off >> 4)); |
| } |
| else |
| { |
| /*************************************************************************/ |
| /* Populate the list of unique search nodes in the child CUs for merge */ |
| /* evaluation */ |
| /*************************************************************************/ |
| for(i = 0; i < 4; i++) |
| { |
| search_node_t s_search_node; |
| |
| PART_TYPE_T e_part_type; |
| PART_ID_T e_part_id; |
| |
| WORD32 part_num; |
| |
| search_results_t *ps_child = aps_child_results[i]; |
| |
| if(ps_child->ps_cu_results->u1_num_best_results) |
| { |
| if(!((ps_child->ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) && |
| (1 == ps_child->ps_cu_results->u1_num_best_results))) |
| { |
| e_part_type = |
| (PART_TYPE_T)ps_child->ps_cu_results->ps_best_results[0].u1_part_type; |
| |
| ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS); |
| |
| /* Insert mvs of NxN partitions. */ |
| for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)]; |
| part_num++) |
| { |
| e_part_id = ge_part_type_to_part_id[e_part_type][part_num]; |
| |
| if(ps_child->aps_part_results[i4_search_idx][e_part_id]->i1_ref_idx != -1) |
| { |
| s_search_node = *ps_child->aps_part_results[i4_search_idx][e_part_id]; |
| if(s_search_node.s_mv.i2_mvx != INTRA_MV) |
| { |
| CLIP_MV_WITHIN_RANGE( |
| s_search_node.s_mv.i2_mvx, |
| s_search_node.s_mv.i2_mvy, |
| pps_range_prms[s_search_node.i1_ref_idx], |
| 0, |
| 0, |
| 0); |
| |
| INSERT_NEW_NODE_NOMAP( |
| as_merge_unique_node, |
| num_unique_nodes_cu_merge, |
| s_search_node, |
| 1); |
| } |
| } |
| } |
| } |
| } |
| else if(!((ps_merge_prms->ps_results_grandchild[(i << 2)] |
| .ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) && |
| (1 == ps_merge_prms->ps_results_grandchild[(i << 2)] |
| .ps_cu_results->u1_num_best_results))) |
| { |
| search_results_t *ps_results_root = &ps_merge_prms->ps_results_grandchild[(i << 2)]; |
| |
| for(j = 0; j < 4; j++) |
| { |
| e_part_type = (PART_TYPE_T)ps_results_root[j] |
| .ps_cu_results->ps_best_results[0] |
| .u1_part_type; |
| |
| ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS); |
| |
| /* Insert mvs of NxN partitions. */ |
| for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)]; |
| part_num++) |
| { |
| e_part_id = ge_part_type_to_part_id[e_part_type][part_num]; |
| |
| if((ps_results_root[j] |
| .aps_part_results[i4_search_idx][e_part_id] |
| ->i1_ref_idx != -1) && |
| (!ps_child->ps_cu_results->ps_best_results->as_pu_results->pu |
| .b1_intra_flag)) |
| { |
| s_search_node = |
| *ps_results_root[j].aps_part_results[i4_search_idx][e_part_id]; |
| if(s_search_node.s_mv.i2_mvx != INTRA_MV) |
| { |
| CLIP_MV_WITHIN_RANGE( |
| s_search_node.s_mv.i2_mvx, |
| s_search_node.s_mv.i2_mvy, |
| pps_range_prms[s_search_node.i1_ref_idx], |
| 0, |
| 0, |
| 0); |
| |
| INSERT_NEW_NODE_NOMAP( |
| as_merge_unique_node, |
| num_unique_nodes_cu_merge, |
| s_search_node, |
| 1); |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| if(0 == num_unique_nodes_cu_merge) |
| { |
| return 0; |
| } |
| |
| /*************************************************************************/ |
| /* Appropriate Err compute fxn, depends on SAD/SATD, blk size and remains*/ |
| /* fixed through this subpel refinement for this partition. */ |
| /* Note, we do not enable grid sads since one pt is evaluated per node */ |
| /* Hence, part mask is also nearly dont care and we use 2Nx2N enabled. */ |
| /*************************************************************************/ |
| i4_part_mask = ps_search_results->i4_part_mask; |
| |
| /* Need to add the corresponding SAD functions for EXTREME SPEED : Lokesh */ |
| if(ps_subpel_prms->i4_use_satd) |
| { |
| if(BLK_32x32 == e_blk_size) |
| { |
| pf_err_compute = hme_evalsatd_pt_pu_32x32; |
| } |
| else |
| { |
| pf_err_compute = hme_evalsatd_pt_pu_64x64; |
| } |
| } |
| else |
| { |
| pf_err_compute = (PF_SAD_FXN_T)hme_evalsad_grid_pu_MxM; |
| } |
| |
| i4_ref_stride = ps_curr_layer->i4_rec_stride; |
| |
| x_off = ps_merge_prms->ps_results_tl->u1_x_off; |
| y_off = ps_merge_prms->ps_results_tl->u1_y_off; |
| i4_offset = x_off + i4_ctb_x_off + ((y_off + i4_ctb_y_off) * i4_ref_stride); |
| |
| /*************************************************************************/ |
| /* This array stores the ids of the partitions whose */ |
| /* SADs are updated. Since the partitions whose SADs are updated may not */ |
| /* be in contiguous order, we supply another level of indirection. */ |
| /*************************************************************************/ |
| i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids); |
| |
| /* Initialize result params used for partition update */ |
| s_result_prms.pf_mv_cost_compute = NULL; |
| s_result_prms.ps_search_results = ps_search_results; |
| s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids; |
| s_result_prms.i1_ref_idx = i4_search_idx; |
| s_result_prms.i4_part_mask = i4_part_mask; |
| s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0]; |
| s_result_prms.i4_grid_mask = 1; |
| |
| /* One time Initialization of error params used for SAD/SATD compute */ |
| s_err_prms.i4_inp_stride = ps_subpel_prms->i4_inp_stride; |
| s_err_prms.i4_ref_stride = i4_ref_stride; |
| s_err_prms.i4_part_mask = (ENABLE_2Nx2N); |
| s_err_prms.i4_grid_mask = 1; |
| s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0]; |
| s_err_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size]; |
| s_err_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size]; |
| s_err_prms.i4_step = 1; |
| |
| /*************************************************************************/ |
| /* One time preparation of non changing interpolation params. */ |
| /*************************************************************************/ |
| s_interp_prms.i4_ref_stride = i4_ref_stride; |
| s_interp_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size]; |
| s_interp_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size]; |
| s_interp_prms.apu1_interp_out[0] = ps_subpel_prms->pu1_wkg_mem; |
| s_interp_prms.i4_out_stride = gau1_blk_size_to_wd[e_blk_size]; |
| i4_interp_buf_id = 0; |
| |
| pf_qpel_interp = ps_subpel_prms->pf_qpel_interp; |
| |
| /***************************************************************************/ |
| /* Compute SATD/SAD for all unique nodes of children CUs to get best merge */ |
| /* results */ |
| /***************************************************************************/ |
| for(i = 0; i < num_unique_nodes_cu_merge; i++) |
| { |
| WORD8 i1_ref_idx; |
| ps_search_node = &as_merge_unique_node[i]; |
| |
| /*********************************************************************/ |
| /* Compute the base pointer for input, interpolated buffers */ |
| /* The base pointers point as follows: */ |
| /* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */ |
| /* To these, we need to add the offset of the current node */ |
| /*********************************************************************/ |
| i1_ref_idx = ps_search_node->i1_ref_idx; |
| apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset; |
| apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset; |
| apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset; |
| apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset; |
| |
| s_interp_prms.ppu1_ref = &apu1_hpel_ref[0]; |
| |
| pf_qpel_interp( |
| &s_interp_prms, |
| ps_search_node->s_mv.i2_mvx, |
| ps_search_node->s_mv.i2_mvy, |
| i4_interp_buf_id); |
| |
| pred_lx = i4_search_idx; |
| ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; |
| |
| s_result_prms.u1_pred_lx = pred_lx; |
| s_result_prms.ps_search_node_base = ps_search_node; |
| s_err_prms.pu1_inp = |
| ps_wt_inp_prms->apu1_wt_inp[i1_ref_idx] + x_off + y_off * ps_subpel_prms->i4_inp_stride; |
| s_err_prms.pu1_ref = s_interp_prms.pu1_final_out; |
| s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride; |
| |
| /* Carry out the SAD/SATD. This call also does the TU RECURSION. |
| Here the tu recursion logic is restricted with the size of the PU*/ |
| pf_err_compute(&s_err_prms); |
| |
| if(ps_subpel_prms->u1_is_cu_noisy && |
| ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier) |
| { |
| ps_me_optimised_function_list->pf_compute_stim_injected_distortion_for_all_parts( |
| s_err_prms.pu1_ref, |
| s_err_prms.i4_ref_stride, |
| ai4_valid_part_ids, |
| ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX, |
| ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared, |
| s_err_prms.pi4_sad_grid, |
| ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier, |
| ps_wt_inp_prms->a_inv_wpred_wt[i1_ref_idx], |
| ps_wt_inp_prms->ai4_shift_val[i1_ref_idx], |
| i4_num_valid_parts, |
| ps_wt_inp_prms->wpred_log_wdc, |
| (BLK_32x32 == e_blk_size) ? 32 : 64); |
| } |
| |
| /* Update the mv's */ |
| s_result_prms.i2_mv_x = ps_search_node->s_mv.i2_mvx; |
| s_result_prms.i2_mv_y = ps_search_node->s_mv.i2_mvy; |
| |
| /* Update best results */ |
| hme_update_results_pt_pu_best1_subpel_hs(&s_err_prms, &s_result_prms); |
| } |
| |
| /************************************************************************/ |
| /* Update mv cost and total cost for each valid partition in the CU */ |
| /************************************************************************/ |
| for(i = 0; i < TOT_NUM_PARTS; i++) |
| { |
| if(i4_part_mask & (1 << i)) |
| { |
| WORD32 j; |
| WORD32 i4_mv_cost; |
| |
| ps_search_node = ps_search_results->aps_part_results[i4_search_idx][i]; |
| |
| for(j = 0; |
| j < MIN(ps_search_results->u1_num_results_per_part, num_unique_nodes_cu_merge); |
| j++) |
| { |
| if(ps_search_node->i1_ref_idx != -1) |
| { |
| pred_lx = i4_search_idx; |
| ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; |
| |
| /* Prediction context should now deal with qpel units */ |
| HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL); |
| |
| ps_search_node->u1_subpel_done = 1; |
| ps_search_node->u1_is_avail = 1; |
| |
| i4_mv_cost = |
| pf_mv_cost_compute(ps_search_node, ps_pred_ctxt, (PART_ID_T)i, MV_RES_QPEL); |
| |
| ps_search_node->i4_tot_cost = i4_mv_cost + ps_search_node->i4_sad; |
| ps_search_node->i4_mv_cost = i4_mv_cost; |
| |
| ps_search_node++; |
| } |
| } |
| } |
| } |
| |
| return num_unique_nodes_cu_merge; |
| } |
| |
| #define CU_MERGE_MAX_INTRA_PARTS 4 |
| |
| /** |
| ******************************************************************************** |
| * @fn hme_try_merge_high_speed |
| * |
| * @brief Attempts to merge 4 NxN candts to a 2Nx2N candt, either as a single |
| entity or with partititons for high speed preset |
| * |
| * @param[in,out] hme_merge_prms_t: Params for CU merge |
| * |
| * @return MERGE_RESULT_T type result of merge (CU_MERGED/CU_SPLIT) |
| ******************************************************************************** |
| */ |
| CU_MERGE_RESULT_T hme_try_merge_high_speed( |
| me_ctxt_t *ps_thrd_ctxt, |
| me_frm_ctxt_t *ps_ctxt, |
| ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb, |
| hme_subpel_prms_t *ps_subpel_prms, |
| hme_merge_prms_t *ps_merge_prms, |
| inter_pu_results_t *ps_pu_results, |
| pu_result_t *ps_pu_result) |
| { |
| search_results_t *ps_results_tl, *ps_results_tr; |
| search_results_t *ps_results_bl, *ps_results_br; |
| |
| S32 i; |
| S32 i4_search_idx; |
| S32 i4_cost_parent; |
| S32 intra_cu_size; |
| ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17]; |
| |
| search_results_t *ps_results_merge = ps_merge_prms->ps_results_merge; |
| wgt_pred_ctxt_t *ps_wt_inp_prms = &ps_ctxt->s_wt_pred; |
| |
| S32 i4_part_mask = ENABLE_ALL_PARTS - ENABLE_NxN; |
| S32 is_vert = 0, i4_best_part_type = -1; |
| S32 i4_intra_parts = 0; /* Keeps track of intra percentage before merge */ |
| S32 i4_cost_children = 0; |
| S32 i4_frm_qstep = ps_ctxt->frm_qstep; |
| S32 i4_num_merge_cands_evaluated = 0; |
| U08 u1_x_off = ps_results_merge->u1_x_off; |
| U08 u1_y_off = ps_results_merge->u1_y_off; |
| S32 i4_32x32_id = (u1_y_off >> 4) + (u1_x_off >> 5); |
| |
| ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list = |
| ps_thrd_ctxt->ps_cmn_utils_optimised_function_list; |
| ihevce_me_optimised_function_list_t *ps_me_optimised_function_list = |
| ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list); |
| ps_results_tl = ps_merge_prms->ps_results_tl; |
| ps_results_tr = ps_merge_prms->ps_results_tr; |
| ps_results_bl = ps_merge_prms->ps_results_bl; |
| ps_results_br = ps_merge_prms->ps_results_br; |
| |
| if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED) |
| { |
| i4_part_mask &= ~ENABLE_AMP; |
| } |
| |
| if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25) |
| { |
| i4_part_mask &= ~ENABLE_AMP; |
| |
| i4_part_mask &= ~ENABLE_SMP; |
| } |
| |
| ps_merge_prms->i4_num_pred_dir_actual = 0; |
| |
| /*************************************************************************/ |
| /* The logic for High speed CU merge goes as follows: */ |
| /* */ |
| /* 1. Early exit with CU_SPLIT if sum of best partitions of children CUs */ |
| /* exceed 7 */ |
| /* 2. Early exit with CU_MERGE if mvs of best partitions of children CUs */ |
| /* are identical */ |
| /* 3. Find the all unique mvs of best partitions of children CUs and */ |
| /* evaluate partial SATDs (all 17 partitions) for each unique mv. If */ |
| /* best parent cost is lower than sum of the best children costs */ |
| /* return CU_MERGE after seeding the best results else return CU_SPLIT*/ |
| /* */ |
| /*************************************************************************/ |
| |
| /* Count the number of best partitions in child CUs, early exit if > 7 */ |
| if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) || |
| (CU_32x32 == ps_results_merge->e_cu_size)) |
| { |
| S32 num_parts_in_32x32 = 0; |
| WORD32 i4_part_type; |
| |
| if(ps_results_tl->u1_split_flag) |
| { |
| num_parts_in_32x32 += 4; |
| |
| #define COST_INTERCHANGE 0 |
| i4_cost_children = ps_merge_prms->ps_8x8_cu_results[0].ps_best_results->i4_tot_cost + |
| ps_merge_prms->ps_8x8_cu_results[1].ps_best_results->i4_tot_cost + |
| ps_merge_prms->ps_8x8_cu_results[2].ps_best_results->i4_tot_cost + |
| ps_merge_prms->ps_8x8_cu_results[3].ps_best_results->i4_tot_cost; |
| } |
| else |
| { |
| i4_part_type = ps_results_tl->ps_cu_results->ps_best_results[0].u1_part_type; |
| num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type]; |
| i4_cost_children = ps_results_tl->ps_cu_results->ps_best_results[0].i4_tot_cost; |
| } |
| |
| if(ps_results_tr->u1_split_flag) |
| { |
| num_parts_in_32x32 += 4; |
| |
| i4_cost_children += ps_merge_prms->ps_8x8_cu_results[4].ps_best_results->i4_tot_cost + |
| ps_merge_prms->ps_8x8_cu_results[5].ps_best_results->i4_tot_cost + |
| ps_merge_prms->ps_8x8_cu_results[6].ps_best_results->i4_tot_cost + |
| ps_merge_prms->ps_8x8_cu_results[7].ps_best_results->i4_tot_cost; |
| } |
| else |
| { |
| i4_part_type = ps_results_tr->ps_cu_results->ps_best_results[0].u1_part_type; |
| num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type]; |
| i4_cost_children += ps_results_tr->ps_cu_results->ps_best_results[0].i4_tot_cost; |
| } |
| |
| if(ps_results_bl->u1_split_flag) |
| { |
| num_parts_in_32x32 += 4; |
| |
| i4_cost_children += ps_merge_prms->ps_8x8_cu_results[8].ps_best_results->i4_tot_cost + |
| ps_merge_prms->ps_8x8_cu_results[9].ps_best_results->i4_tot_cost + |
| ps_merge_prms->ps_8x8_cu_results[10].ps_best_results->i4_tot_cost + |
| ps_merge_prms->ps_8x8_cu_results[11].ps_best_results->i4_tot_cost; |
| } |
| else |
| { |
| i4_part_type = ps_results_bl->ps_cu_results->ps_best_results[0].u1_part_type; |
| num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type]; |
| i4_cost_children += ps_results_bl->ps_cu_results->ps_best_results[0].i4_tot_cost; |
| } |
| |
| if(ps_results_br->u1_split_flag) |
| { |
| num_parts_in_32x32 += 4; |
| |
| i4_cost_children += ps_merge_prms->ps_8x8_cu_results[12].ps_best_results->i4_tot_cost + |
| ps_merge_prms->ps_8x8_cu_results[13].ps_best_results->i4_tot_cost + |
| ps_merge_prms->ps_8x8_cu_results[14].ps_best_results->i4_tot_cost + |
| ps_merge_prms->ps_8x8_cu_results[15].ps_best_results->i4_tot_cost; |
| } |
| else |
| { |
| i4_part_type = ps_results_br->ps_cu_results->ps_best_results[0].u1_part_type; |
| num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type]; |
| i4_cost_children += ps_results_br->ps_cu_results->ps_best_results[0].i4_tot_cost; |
| } |
| |
| if((num_parts_in_32x32 > 7) && (ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY)) |
| { |
| return CU_SPLIT; |
| } |
| |
| if((num_parts_in_32x32 > MAX_NUM_CONSTITUENT_MVS_TO_ENABLE_32MERGE_IN_XS25) && |
| (ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25)) |
| { |
| return CU_SPLIT; |
| } |
| } |
| |
| /* Accumulate intra percentage before merge for early CU_SPLIT decision */ |
| /* Note : Each intra part represent a NxN unit of the children CUs */ |
| /* This is essentially 1/16th of the CUsize under consideration for merge */ |
| if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset) |
| { |
| if(CU_64x64 == ps_results_merge->e_cu_size) |
| { |
| i4_intra_parts = |
| (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_inter_eval_enable) |
| ? 16 |
| : ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_intra_eval_enable; |
| } |
| else |
| { |
| switch((ps_results_merge->u1_x_off >> 5) + ((ps_results_merge->u1_y_off >> 4))) |
| { |
| case 0: |
| { |
| i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tl |
| ->u1_inter_eval_enable) |
| ? 16 |
| : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root |
| ->ps_child_node_tl->u1_intra_eval_enable); |
| |
| break; |
| } |
| case 1: |
| { |
| i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tr |
| ->u1_inter_eval_enable) |
| ? 16 |
| : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root |
| ->ps_child_node_tr->u1_intra_eval_enable); |
| |
| break; |
| } |
| case 2: |
| { |
| i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_bl |
| ->u1_inter_eval_enable) |
| ? 16 |
| : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root |
| ->ps_child_node_bl->u1_intra_eval_enable); |
| |
| break; |
| } |
| case 3: |
| { |
| i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_br |
| ->u1_inter_eval_enable) |
| ? 16 |
| : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root |
| ->ps_child_node_br->u1_intra_eval_enable); |
| |
| break; |
| } |
| } |
| } |
| } |
| else |
| { |
| for(i = 0; i < 4; i++) |
| { |
| search_results_t *ps_results = |
| (i == 0) ? ps_results_tl |
| : ((i == 1) ? ps_results_tr : ((i == 2) ? ps_results_bl : ps_results_br)); |
| |
| part_type_results_t *ps_best_res = &ps_results->ps_cu_results->ps_best_results[0]; |
| |
| if(ps_results->u1_split_flag) |
| { |
| U08 u1_x_off = ps_results->u1_x_off; |
| U08 u1_y_off = ps_results->u1_y_off; |
| U08 u1_8x8_zscan_id = gau1_ctb_raster_to_zscan[(u1_x_off >> 2) + (u1_y_off << 2)] >> |
| 2; |
| |
| /* Special case to handle 8x8 CUs when 16x16 is split */ |
| ASSERT(ps_results->e_cu_size == CU_16x16); |
| |
| ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id].ps_best_results[0]; |
| |
| if(ps_best_res->as_pu_results[0].pu.b1_intra_flag) |
| i4_intra_parts += 1; |
| |
| ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 1].ps_best_results[0]; |
| |
| if(ps_best_res->as_pu_results[0].pu.b1_intra_flag) |
| i4_intra_parts += 1; |
| |
| ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 2].ps_best_results[0]; |
| |
| if(ps_best_res->as_pu_results[0].pu.b1_intra_flag) |
| i4_intra_parts += 1; |
| |
| ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 3].ps_best_results[0]; |
| |
| if(ps_best_res->as_pu_results[0].pu.b1_intra_flag) |
| i4_intra_parts += 1; |
| } |
| else if(ps_best_res[0].as_pu_results[0].pu.b1_intra_flag) |
| { |
| i4_intra_parts += 4; |
| } |
| } |
| } |
| |
| /* Determine the max intra CU size indicated by IPE */ |
| intra_cu_size = CU_64x64; |
| if(ps_cur_ipe_ctb->u1_split_flag) |
| { |
| intra_cu_size = CU_32x32; |
| if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag) |
| { |
| intra_cu_size = CU_16x16; |
| } |
| } |
| |
| if(((i4_intra_parts > CU_MERGE_MAX_INTRA_PARTS) && |
| (intra_cu_size < ps_results_merge->e_cu_size) && |
| (ME_PRISTINE_QUALITY != ps_merge_prms->e_quality_preset)) || |
| (i4_intra_parts == 16)) |
| { |
| S32 i4_merge_outcome; |
| |
| i4_merge_outcome = (CU_32x32 == ps_results_merge->e_cu_size) |
| ? (!ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag && |
| ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_valid_cu) |
| : (!ps_cur_ipe_ctb->u1_split_flag); |
| |
| i4_merge_outcome = i4_merge_outcome || |
| (ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset); |
| |
| i4_merge_outcome = i4_merge_outcome && |
| !(ps_subpel_prms->u1_is_cu_noisy && DISABLE_INTRA_WHEN_NOISY); |
| |
| if(i4_merge_outcome) |
| { |
| inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results; |
| part_type_results_t *ps_best_result = ps_cu_results->ps_best_results; |
| pu_t *ps_pu = &ps_best_result->as_pu_results->pu; |
| |
| ps_cu_results->u1_num_best_results = 1; |
| ps_cu_results->u1_cu_size = ps_results_merge->e_cu_size; |
| ps_cu_results->u1_x_off = u1_x_off; |
| ps_cu_results->u1_y_off = u1_y_off; |
| |
| ps_best_result->u1_part_type = PRT_2Nx2N; |
| ps_best_result->ai4_tu_split_flag[0] = 0; |
| ps_best_result->ai4_tu_split_flag[1] = 0; |
| ps_best_result->ai4_tu_split_flag[2] = 0; |
| ps_best_result->ai4_tu_split_flag[3] = 0; |
| ps_best_result->i4_tot_cost = |
| (CU_64x64 == ps_results_merge->e_cu_size) |
| ? ps_cur_ipe_ctb->i4_best64x64_intra_cost |
| : ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id]; |
| |
| ps_pu->b1_intra_flag = 1; |
| ps_pu->b4_pos_x = u1_x_off >> 2; |
| ps_pu->b4_pos_y = u1_y_off >> 2; |
| ps_pu->b4_wd = (1 << (ps_results_merge->e_cu_size + 1)) - 1; |
| ps_pu->b4_ht = ps_pu->b4_wd; |
| ps_pu->mv.i1_l0_ref_idx = -1; |
| ps_pu->mv.i1_l1_ref_idx = -1; |
| ps_pu->mv.s_l0_mv.i2_mvx = INTRA_MV; |
| ps_pu->mv.s_l0_mv.i2_mvy = INTRA_MV; |
| ps_pu->mv.s_l1_mv.i2_mvx = INTRA_MV; |
| ps_pu->mv.s_l1_mv.i2_mvy = INTRA_MV; |
| |
| return CU_MERGED; |
| } |
| else |
| { |
| return CU_SPLIT; |
| } |
| } |
| |
| if(i4_intra_parts) |
| { |
| i4_part_mask = ENABLE_2Nx2N; |
| } |
| |
| ps_results_merge->u1_num_active_ref = (ps_ctxt->s_frm_prms.bidir_enabled) ? 2 : 1; |
| |
| hme_reset_search_results(ps_results_merge, i4_part_mask, MV_RES_QPEL); |
| |
| ps_results_merge->u1_num_active_ref = ps_merge_prms->i4_num_ref; |
| ps_merge_prms->i4_num_pred_dir_actual = 0; |
| |
| if(ps_subpel_prms->u1_is_cu_noisy && ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier) |
| { |
| S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1]; |
| S32 i4_num_valid_parts; |
| S32 i4_sigma_array_offset; |
| |
| i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids); |
| |
| /*********************************************************************************************************************************************/ |
| /* i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values */ |
| /* Logic is x/4 + ((y/4) x 16) : every 4 pixel increase in x equals one 4x4 block increment, every 4 pixel increase in y equals 16 4x4 block */ |
| /* increment as there will be 256 4x4 blocks in a CTB */ |
| /*********************************************************************************************************************************************/ |
| i4_sigma_array_offset = (ps_merge_prms->ps_results_merge->u1_x_off / 4) + |
| (ps_merge_prms->ps_results_merge->u1_y_off * 4); |
| |
| for(i = 0; i < i4_num_valid_parts; i++) |
| { |
| S32 i4_part_id = ai4_valid_part_ids[i]; |
| |
| hme_compute_final_sigma_of_pu_from_base_blocks( |
| ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset, |
| ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset, |
| au8_final_src_sigmaX, |
| au8_final_src_sigmaXSquared, |
| (CU_32x32 == ps_results_merge->e_cu_size) ? 32 : 64, |
| 4, |
| i4_part_id, |
| 16); |
| } |
| |
| ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX = au8_final_src_sigmaX; |
| ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared; |
| } |
| |
| /*************************************************************************/ |
| /* Loop through all ref idx and pick the merge candts and refine based */ |
| /* on the active partitions. At this stage num ref will be 1 or 2 */ |
| /*************************************************************************/ |
| for(i4_search_idx = 0; i4_search_idx < ps_merge_prms->i4_num_ref; i4_search_idx++) |
| { |
| S32 i4_cands; |
| U08 u1_pred_dir = 0; |
| |
| if((2 == ps_merge_prms->i4_num_ref) || (!ps_ctxt->s_frm_prms.bidir_enabled)) |
| { |
| u1_pred_dir = i4_search_idx; |
| } |
| else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0) |
| { |
| u1_pred_dir = 1; |
| } |
| else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0) |
| { |
| u1_pred_dir = 0; |
| } |
| else |
| { |
| ASSERT(0); |
| } |
| |
| /* call the function to pick and evaluate the merge candts, given */ |
| /* a ref id and a part mask. */ |
| i4_cands = hme_pick_eval_merge_candts( |
| ps_merge_prms, |
| ps_subpel_prms, |
| u1_pred_dir, |
| i4_best_part_type, |
| is_vert, |
| ps_wt_inp_prms, |
| i4_frm_qstep, |
| ps_cmn_utils_optimised_function_list, |
| ps_me_optimised_function_list); |
| |
| if(i4_cands) |
| { |
| ps_merge_prms->au1_pred_dir_searched[ps_merge_prms->i4_num_pred_dir_actual] = |
| u1_pred_dir; |
| ps_merge_prms->i4_num_pred_dir_actual++; |
| } |
| |
| i4_num_merge_cands_evaluated += i4_cands; |
| } |
| |
| /* Call the decide_part_types function here */ |
| /* Populate the new PU struct with the results post subpel refinement*/ |
| if(i4_num_merge_cands_evaluated) |
| { |
| inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results; |
| |
| hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr); |
| |
| ps_merge_prms->ps_inter_ctb_prms->i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off; |
| ps_merge_prms->ps_inter_ctb_prms->i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off; |
| |
| hme_populate_pus( |
| ps_thrd_ctxt, |
| ps_ctxt, |
| ps_subpel_prms, |
| ps_results_merge, |
| ps_cu_results, |
| ps_pu_results, |
| ps_pu_result, |
| ps_merge_prms->ps_inter_ctb_prms, |
| &ps_ctxt->s_wt_pred, |
| ps_merge_prms->ps_layer_ctxt, |
| ps_merge_prms->au1_pred_dir_searched, |
| ps_merge_prms->i4_num_pred_dir_actual); |
| |
| ps_cu_results->i4_inp_offset = (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64); |
| |
| hme_decide_part_types( |
| ps_cu_results, |
| ps_pu_results, |
| ps_merge_prms->ps_inter_ctb_prms, |
| ps_ctxt, |
| ps_cmn_utils_optimised_function_list, |
| ps_me_optimised_function_list |
| |
| ); |
| |
| /*****************************************************************/ |
| /* INSERT INTRA RESULTS AT 32x32/64x64 LEVEL. */ |
| /*****************************************************************/ |
| #if DISABLE_INTRA_IN_BPICS |
| if(1 != ((ME_XTREME_SPEED_25 == ps_merge_prms->e_quality_preset) && |
| (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))) |
| #endif |
| { |
| if(!(DISABLE_INTRA_WHEN_NOISY && ps_merge_prms->ps_inter_ctb_prms->u1_is_cu_noisy)) |
| { |
| hme_insert_intra_nodes_post_bipred( |
| ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep); |
| } |
| } |
| } |
| else |
| { |
| return CU_SPLIT; |
| } |
| |
| /* We check the best result of ref idx 0 and compare for parent vs child */ |
| if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) || |
| (CU_32x32 == ps_results_merge->e_cu_size)) |
| { |
| i4_cost_parent = ps_results_merge->ps_cu_results->ps_best_results[0].i4_tot_cost; |
| /*********************************************************************/ |
| /* Add the cost of signaling the CU tree bits. */ |
| /* Assuming parent is not split, then we signal 1 bit for this parent*/ |
| /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */ |
| /* So, 4*lambda is extra for children cost. :Lokesh */ |
| /*********************************************************************/ |
| { |
| pred_ctxt_t *ps_pred_ctxt = &ps_results_merge->as_pred_ctxt[0]; |
| |
| i4_cost_children += ((4 * ps_pred_ctxt->lambda) >> (ps_pred_ctxt->lambda_q_shift)); |
| } |
| |
| if(i4_cost_parent < i4_cost_children) |
| { |
| return CU_MERGED; |
| } |
| |
| return CU_SPLIT; |
| } |
| else |
| { |
| return CU_MERGED; |
| } |
| } |
| |
| #define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift) \ |
| { \ |
| (ps_mv)->i2_mv_x = (ps_search_node)->s_mv.i2_mvx >> (shift); \ |
| (ps_mv)->i2_mv_y = (ps_search_node)->s_mv.i2_mvy >> (shift); \ |
| *(pi1_ref_idx) = (ps_search_node)->i1_ref_idx; \ |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn hme_update_mv_bank_noencode(search_results_t *ps_search_results, |
| * layer_mv_t *ps_layer_mv, |
| * S32 i4_search_blk_x, |
| * S32 i4_search_blk_y, |
| * mvbank_update_prms_t *ps_prms) |
| * |
| * @brief Updates the mv bank in case there is no further encodign to be done |
| * |
| * @param[in] ps_search_results: contains results for the block just searched |
| * |
| * @param[in,out] ps_layer_mv : Has pointer to mv bank amongst other things |
| * |
| * @param[in] i4_search_blk_x : col num of blk being searched |
| * |
| * @param[in] i4_search_blk_y : row num of blk being searched |
| * |
| * @param[in] ps_prms : contains certain parameters which govern how updatedone |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| |
| void hme_update_mv_bank_noencode( |
| search_results_t *ps_search_results, |
| layer_mv_t *ps_layer_mv, |
| S32 i4_search_blk_x, |
| S32 i4_search_blk_y, |
| mvbank_update_prms_t *ps_prms) |
| { |
| hme_mv_t *ps_mv; |
| hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4; |
| S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4; |
| S32 i4_blk_x, i4_blk_y, i4_offset; |
| S32 i4_j, i4_ref_id; |
| search_node_t *ps_search_node; |
| search_node_t *ps_search_node_8x8, *ps_search_node_4x4_1; |
| search_node_t *ps_search_node_4x4_2, *ps_search_node_4x4_3; |
| search_node_t *ps_search_node_4x4_4; |
| |
| i4_blk_x = i4_search_blk_x << ps_prms->i4_shift; |
| i4_blk_y = i4_search_blk_y << ps_prms->i4_shift; |
| i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row; |
| |
| i4_offset *= ps_layer_mv->i4_num_mvs_per_blk; |
| |
| /* Identify the correct offset in the mvbank and the reference id buf */ |
| ps_mv = ps_layer_mv->ps_mv + i4_offset; |
| pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset; |
| |
| /*************************************************************************/ |
| /* Supposing we store the mvs in the same blk size as we searched (e.g. */ |
| /* we searched 8x8 blks and store results for 8x8 blks), then we can */ |
| /* do a straightforward single update of results. This will have a 1-1 */ |
| /* correspondence. */ |
| /*************************************************************************/ |
| if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size) |
| { |
| for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++) |
| { |
| ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N]; |
| for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++) |
| { |
| COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, 0); |
| ps_mv++; |
| pi1_ref_idx++; |
| ps_search_node++; |
| } |
| } |
| return; |
| } |
| |
| /*************************************************************************/ |
| /* Case where search blk size is 8x8, but we update 4x4 results. In this */ |
| /* case, we need to have NxN partitions enabled in search. */ |
| /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */ |
| /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/ |
| /*************************************************************************/ |
| ASSERT(ps_layer_mv->e_blk_size == BLK_4x4); |
| ASSERT(ps_prms->e_search_blk_size == BLK_8x8); |
| ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN)); |
| |
| /*************************************************************************/ |
| /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */ |
| /* hence the below check. */ |
| /*************************************************************************/ |
| ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1); |
| |
| ps_mv1 = ps_mv; |
| ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk; |
| ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row); |
| ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk); |
| pi1_ref_idx1 = pi1_ref_idx; |
| pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk; |
| pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row); |
| pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk); |
| |
| for(i4_ref_id = 0; i4_ref_id < (S32)ps_search_results->u1_num_active_ref; i4_ref_id++) |
| { |
| ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N]; |
| |
| ps_search_node_4x4_1 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL]; |
| |
| ps_search_node_4x4_2 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TR]; |
| |
| ps_search_node_4x4_3 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BL]; |
| |
| ps_search_node_4x4_4 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BR]; |
| |
| COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0); |
| ps_mv1++; |
| pi1_ref_idx1++; |
| ps_search_node_4x4_1++; |
| COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0); |
| ps_mv2++; |
| pi1_ref_idx2++; |
| ps_search_node_4x4_2++; |
| COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0); |
| ps_mv3++; |
| pi1_ref_idx3++; |
| ps_search_node_4x4_3++; |
| COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0); |
| ps_mv4++; |
| pi1_ref_idx4++; |
| ps_search_node_4x4_4++; |
| |
| if(ps_layer_mv->i4_num_mvs_per_ref > 1) |
| { |
| COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_8x8, 0); |
| ps_mv1++; |
| pi1_ref_idx1++; |
| COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_8x8, 0); |
| ps_mv2++; |
| pi1_ref_idx2++; |
| COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_8x8, 0); |
| ps_mv3++; |
| pi1_ref_idx3++; |
| COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_8x8, 0); |
| ps_mv4++; |
| pi1_ref_idx4++; |
| } |
| |
| for(i4_j = 2; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++) |
| { |
| COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0); |
| ps_mv1++; |
| pi1_ref_idx1++; |
| ps_search_node_4x4_1++; |
| COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0); |
| ps_mv2++; |
| pi1_ref_idx2++; |
| ps_search_node_4x4_2++; |
| COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0); |
| ps_mv3++; |
| pi1_ref_idx3++; |
| ps_search_node_4x4_3++; |
| COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0); |
| ps_mv4++; |
| pi1_ref_idx4++; |
| ps_search_node_4x4_4++; |
| } |
| } |
| } |
| |
| void hme_update_mv_bank_encode( |
| search_results_t *ps_search_results, |
| layer_mv_t *ps_layer_mv, |
| S32 i4_search_blk_x, |
| S32 i4_search_blk_y, |
| mvbank_update_prms_t *ps_prms, |
| U08 *pu1_pred_dir_searched, |
| S32 i4_num_act_ref_l0) |
| { |
| hme_mv_t *ps_mv; |
| hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4; |
| S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4; |
| S32 i4_blk_x, i4_blk_y, i4_offset; |
| S32 j, i, num_parts; |
| search_node_t *ps_search_node_tl, *ps_search_node_tr; |
| search_node_t *ps_search_node_bl, *ps_search_node_br; |
| search_node_t s_zero_mv; |
| WORD32 i4_part_type = ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type; |
| |
| i4_blk_x = i4_search_blk_x << ps_prms->i4_shift; |
| i4_blk_y = i4_search_blk_y << ps_prms->i4_shift; |
| i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row; |
| |
| i4_offset *= ps_layer_mv->i4_num_mvs_per_blk; |
| |
| /* Identify the correct offset in the mvbank and the reference id buf */ |
| ps_mv = ps_layer_mv->ps_mv + i4_offset; |
| pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset; |
| |
| ASSERT(ps_layer_mv->e_blk_size == BLK_8x8); |
| ASSERT(ps_prms->e_search_blk_size == BLK_16x16); |
| |
| /*************************************************************************/ |
| /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */ |
| /* hence the below check. */ |
| /*************************************************************************/ |
| ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_best_results); |
| |
| ps_mv1 = ps_mv; |
| ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk; |
| ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row); |
| ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk); |
| pi1_ref_idx1 = pi1_ref_idx; |
| pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk; |
| pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row); |
| pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk); |
| |
| /* Initialize zero mv: default mv used for intra mvs */ |
| s_zero_mv.s_mv.i2_mvx = 0; |
| s_zero_mv.s_mv.i2_mvy = 0; |
| s_zero_mv.i1_ref_idx = 0; |
| |
| if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) && |
| (ps_search_results->i4_part_mask & ENABLE_NxN)) |
| { |
| i4_part_type = PRT_NxN; |
| } |
| |
| for(i = 0; i < ps_prms->i4_num_ref; i++) |
| { |
| for(j = 0; j < ps_layer_mv->i4_num_mvs_per_ref; j++) |
| { |
| WORD32 i4_part_id = ge_part_type_to_part_id[i4_part_type][0]; |
| |
| num_parts = gau1_num_parts_in_part_type[i4_part_type]; |
| |
| ps_search_node_tl = |
| ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id]; |
| |
| if(num_parts == 1) |
| { |
| ps_search_node_tr = ps_search_node_tl; |
| ps_search_node_bl = ps_search_node_tl; |
| ps_search_node_br = ps_search_node_tl; |
| } |
| else if(num_parts == 2) |
| { |
| /* For vertically oriented partitions, tl, bl pt to same result */ |
| /* For horizontally oriented partition, tl, tr pt to same result */ |
| /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */ |
| /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */ |
| /* and right 2 8x8 have 12x16R partition */ |
| if(gau1_is_vert_part[i4_part_type]) |
| { |
| ps_search_node_tr = |
| ps_search_results |
| ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1]; |
| ps_search_node_bl = ps_search_node_tl; |
| } |
| else |
| { |
| ps_search_node_tr = ps_search_node_tl; |
| ps_search_node_bl = |
| ps_search_results |
| ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1]; |
| } |
| ps_search_node_br = |
| ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1]; |
| } |
| else |
| { |
| /* 4 unique results */ |
| ps_search_node_tr = |
| ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1]; |
| ps_search_node_bl = |
| ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2]; |
| ps_search_node_br = |
| ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3]; |
| } |
| |
| if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV) |
| ps_search_node_tl++; |
| if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV) |
| ps_search_node_tr++; |
| if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV) |
| ps_search_node_bl++; |
| if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV) |
| ps_search_node_br++; |
| |
| COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0); |
| ps_mv1++; |
| pi1_ref_idx1++; |
| COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0); |
| ps_mv2++; |
| pi1_ref_idx2++; |
| COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0); |
| ps_mv3++; |
| pi1_ref_idx3++; |
| COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0); |
| ps_mv4++; |
| pi1_ref_idx4++; |
| |
| if(ps_prms->i4_num_results_to_store > 1) |
| { |
| ps_search_node_tl = |
| &ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id][1]; |
| |
| if(num_parts == 1) |
| { |
| ps_search_node_tr = ps_search_node_tl; |
| ps_search_node_bl = ps_search_node_tl; |
| ps_search_node_br = ps_search_node_tl; |
| } |
| else if(num_parts == 2) |
| { |
| /* For vertically oriented partitions, tl, bl pt to same result */ |
| /* For horizontally oriented partition, tl, tr pt to same result */ |
| /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */ |
| /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */ |
| /* and right 2 8x8 have 12x16R partition */ |
| if(gau1_is_vert_part[i4_part_type]) |
| { |
| ps_search_node_tr = |
| &ps_search_results |
| ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1]; |
| ps_search_node_bl = ps_search_node_tl; |
| } |
| else |
| { |
| ps_search_node_tr = ps_search_node_tl; |
| ps_search_node_bl = |
| &ps_search_results |
| ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1]; |
| } |
| ps_search_node_br = |
| &ps_search_results |
| ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1]; |
| } |
| else |
| { |
| /* 4 unique results */ |
| ps_search_node_tr = |
| &ps_search_results |
| ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1]; |
| ps_search_node_bl = |
| &ps_search_results |
| ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2][1]; |
| ps_search_node_br = |
| &ps_search_results |
| ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3][1]; |
| } |
| |
| if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV) |
| ps_search_node_tl++; |
| if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV) |
| ps_search_node_tr++; |
| if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV) |
| ps_search_node_bl++; |
| if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV) |
| ps_search_node_br++; |
| |
| COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0); |
| ps_mv1++; |
| pi1_ref_idx1++; |
| COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0); |
| ps_mv2++; |
| pi1_ref_idx2++; |
| COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0); |
| ps_mv3++; |
| pi1_ref_idx3++; |
| COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0); |
| ps_mv4++; |
| pi1_ref_idx4++; |
| } |
| } |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn hme_update_mv_bank_noencode(search_results_t *ps_search_results, |
| * layer_mv_t *ps_layer_mv, |
| * S32 i4_search_blk_x, |
| * S32 i4_search_blk_y, |
| * mvbank_update_prms_t *ps_prms) |
| * |
| * @brief Updates the mv bank in case there is no further encodign to be done |
| * |
| * @param[in] ps_search_results: contains results for the block just searched |
| * |
| * @param[in,out] ps_layer_mv : Has pointer to mv bank amongst other things |
| * |
| * @param[in] i4_search_blk_x : col num of blk being searched |
| * |
| * @param[in] i4_search_blk_y : row num of blk being searched |
| * |
| * @param[in] ps_prms : contains certain parameters which govern how updatedone |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| |
| void hme_update_mv_bank_in_l1_me( |
| search_results_t *ps_search_results, |
| layer_mv_t *ps_layer_mv, |
| S32 i4_search_blk_x, |
| S32 i4_search_blk_y, |
| mvbank_update_prms_t *ps_prms) |
| { |
| hme_mv_t *ps_mv; |
| hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4; |
| S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4; |
| S32 i4_blk_x, i4_blk_y, i4_offset; |
| S32 i4_j, i4_ref_id; |
| search_node_t *ps_search_node; |
| search_node_t *ps_search_node_8x8, *ps_search_node_4x4; |
| |
| i4_blk_x = i4_search_blk_x << ps_prms->i4_shift; |
| i4_blk_y = i4_search_blk_y << ps_prms->i4_shift; |
| i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row; |
| |
| i4_offset *= ps_layer_mv->i4_num_mvs_per_blk; |
| |
| /* Identify the correct offset in the mvbank and the reference id buf */ |
| ps_mv = ps_layer_mv->ps_mv + i4_offset; |
| pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset; |
| |
| /*************************************************************************/ |
| /* Supposing we store the mvs in the same blk size as we searched (e.g. */ |
| /* we searched 8x8 blks and store results for 8x8 blks), then we can */ |
| /* do a straightforward single update of results. This will have a 1-1 */ |
| /* correspondence. */ |
| /*************************************************************************/ |
| if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size) |
| { |
| search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 2]; |
| |
| hme_mv_t *ps_mv_l0_root = ps_mv; |
| hme_mv_t *ps_mv_l1_root = |
| ps_mv + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); |
| |
| U32 u4_num_l0_results_updated = 0; |
| U32 u4_num_l1_results_updated = 0; |
| |
| S08 *pi1_ref_idx_l0_root = pi1_ref_idx; |
| S08 *pi1_ref_idx_l1_root = |
| pi1_ref_idx_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); |
| |
| for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++) |
| { |
| U32 *pu4_num_results_updated; |
| search_node_t **pps_result_nodes; |
| |
| U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id]; |
| |
| if(u1_pred_dir_of_cur_ref) |
| { |
| pu4_num_results_updated = &u4_num_l1_results_updated; |
| pps_result_nodes = &aps_result_nodes_sorted[1][0]; |
| } |
| else |
| { |
| pu4_num_results_updated = &u4_num_l0_results_updated; |
| pps_result_nodes = &aps_result_nodes_sorted[0][0]; |
| } |
| |
| ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N]; |
| |
| for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++) |
| { |
| hme_add_new_node_to_a_sorted_array( |
| &ps_search_node[i4_j], pps_result_nodes, NULL, *pu4_num_results_updated, 0); |
| |
| ASSERT(ps_search_node[i4_j].i1_ref_idx == i4_ref_id); |
| (*pu4_num_results_updated)++; |
| } |
| } |
| |
| for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++) |
| { |
| COPY_SEARCH_RESULT( |
| &ps_mv_l0_root[i4_j], |
| &pi1_ref_idx_l0_root[i4_j], |
| aps_result_nodes_sorted[0][i4_j], |
| 0); |
| } |
| |
| for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++) |
| { |
| COPY_SEARCH_RESULT( |
| &ps_mv_l1_root[i4_j], |
| &pi1_ref_idx_l1_root[i4_j], |
| aps_result_nodes_sorted[1][i4_j], |
| 0); |
| } |
| |
| return; |
| } |
| |
| /*************************************************************************/ |
| /* Case where search blk size is 8x8, but we update 4x4 results. In this */ |
| /* case, we need to have NxN partitions enabled in search. */ |
| /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */ |
| /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/ |
| /*************************************************************************/ |
| ASSERT(ps_layer_mv->e_blk_size == BLK_4x4); |
| ASSERT(ps_prms->e_search_blk_size == BLK_8x8); |
| ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN)); |
| |
| /*************************************************************************/ |
| /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */ |
| /* hence the below check. */ |
| /*************************************************************************/ |
| ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1); |
| |
| ps_mv1 = ps_mv; |
| ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk; |
| ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row); |
| ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk); |
| pi1_ref_idx1 = pi1_ref_idx; |
| pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk; |
| pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row); |
| pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk); |
| |
| { |
| search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 4]; |
| U08 au1_cost_shifts_for_sorted_node[2][MAX_NUM_REF * 4]; |
| |
| S32 i; |
| |
| hme_mv_t *ps_mv1_l0_root = ps_mv1; |
| hme_mv_t *ps_mv1_l1_root = |
| ps_mv1 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); |
| hme_mv_t *ps_mv2_l0_root = ps_mv2; |
| hme_mv_t *ps_mv2_l1_root = |
| ps_mv2 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); |
| hme_mv_t *ps_mv3_l0_root = ps_mv3; |
| hme_mv_t *ps_mv3_l1_root = |
| ps_mv3 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); |
| hme_mv_t *ps_mv4_l0_root = ps_mv4; |
| hme_mv_t *ps_mv4_l1_root = |
| ps_mv4 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); |
| |
| U32 u4_num_l0_results_updated = 0; |
| U32 u4_num_l1_results_updated = 0; |
| |
| S08 *pi1_ref_idx1_l0_root = pi1_ref_idx1; |
| S08 *pi1_ref_idx1_l1_root = pi1_ref_idx1_l0_root + (ps_prms->i4_num_active_ref_l0 * |
| ps_layer_mv->i4_num_mvs_per_ref); |
| S08 *pi1_ref_idx2_l0_root = pi1_ref_idx2; |
| S08 *pi1_ref_idx2_l1_root = pi1_ref_idx2_l0_root + (ps_prms->i4_num_active_ref_l0 * |
| ps_layer_mv->i4_num_mvs_per_ref); |
| S08 *pi1_ref_idx3_l0_root = pi1_ref_idx3; |
| S08 *pi1_ref_idx3_l1_root = pi1_ref_idx3_l0_root + (ps_prms->i4_num_active_ref_l0 * |
| ps_layer_mv->i4_num_mvs_per_ref); |
| S08 *pi1_ref_idx4_l0_root = pi1_ref_idx4; |
| S08 *pi1_ref_idx4_l1_root = pi1_ref_idx4_l0_root + (ps_prms->i4_num_active_ref_l0 * |
| ps_layer_mv->i4_num_mvs_per_ref); |
| |
| for(i = 0; i < 4; i++) |
| { |
| hme_mv_t *ps_mv_l0_root; |
| hme_mv_t *ps_mv_l1_root; |
| |
| S08 *pi1_ref_idx_l0_root; |
| S08 *pi1_ref_idx_l1_root; |
| |
| for(i4_ref_id = 0; i4_ref_id < ps_search_results->u1_num_active_ref; i4_ref_id++) |
| { |
| U32 *pu4_num_results_updated; |
| search_node_t **pps_result_nodes; |
| U08 *pu1_cost_shifts_for_sorted_node; |
| |
| U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id]; |
| |
| if(u1_pred_dir_of_cur_ref) |
| { |
| pu4_num_results_updated = &u4_num_l1_results_updated; |
| pps_result_nodes = &aps_result_nodes_sorted[1][0]; |
| pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0]; |
| } |
| else |
| { |
| pu4_num_results_updated = &u4_num_l0_results_updated; |
| pps_result_nodes = &aps_result_nodes_sorted[0][0]; |
| pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0]; |
| } |
| |
| ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N]; |
| |
| ps_search_node_4x4 = |
| ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL + i]; |
| |
| for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++) |
| { |
| hme_add_new_node_to_a_sorted_array( |
| &ps_search_node_4x4[i4_j], |
| pps_result_nodes, |
| pu1_cost_shifts_for_sorted_node, |
| *pu4_num_results_updated, |
| 0); |
| |
| (*pu4_num_results_updated)++; |
| |
| hme_add_new_node_to_a_sorted_array( |
| &ps_search_node_8x8[i4_j], |
| pps_result_nodes, |
| pu1_cost_shifts_for_sorted_node, |
| *pu4_num_results_updated, |
| 2); |
| |
| (*pu4_num_results_updated)++; |
| } |
| } |
| |
| switch(i) |
| { |
| case 0: |
| { |
| ps_mv_l0_root = ps_mv1_l0_root; |
| ps_mv_l1_root = ps_mv1_l1_root; |
| |
| pi1_ref_idx_l0_root = pi1_ref_idx1_l0_root; |
| pi1_ref_idx_l1_root = pi1_ref_idx1_l1_root; |
| |
| break; |
| } |
| case 1: |
| { |
| ps_mv_l0_root = ps_mv2_l0_root; |
| ps_mv_l1_root = ps_mv2_l1_root; |
| |
| pi1_ref_idx_l0_root = pi1_ref_idx2_l0_root; |
| pi1_ref_idx_l1_root = pi1_ref_idx2_l1_root; |
| |
| break; |
| } |
| case 2: |
| { |
| ps_mv_l0_root = ps_mv3_l0_root; |
| ps_mv_l1_root = ps_mv3_l1_root; |
| |
| pi1_ref_idx_l0_root = pi1_ref_idx3_l0_root; |
| pi1_ref_idx_l1_root = pi1_ref_idx3_l1_root; |
| |
| break; |
| } |
| case 3: |
| { |
| ps_mv_l0_root = ps_mv4_l0_root; |
| ps_mv_l1_root = ps_mv4_l1_root; |
| |
| pi1_ref_idx_l0_root = pi1_ref_idx4_l0_root; |
| pi1_ref_idx_l1_root = pi1_ref_idx4_l1_root; |
| |
| break; |
| } |
| } |
| |
| u4_num_l0_results_updated = |
| MIN((S32)u4_num_l0_results_updated, |
| ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref); |
| |
| u4_num_l1_results_updated = |
| MIN((S32)u4_num_l1_results_updated, |
| ps_prms->i4_num_active_ref_l1 * ps_layer_mv->i4_num_mvs_per_ref); |
| |
| for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++) |
| { |
| COPY_SEARCH_RESULT( |
| &ps_mv_l0_root[i4_j], |
| &pi1_ref_idx_l0_root[i4_j], |
| aps_result_nodes_sorted[0][i4_j], |
| 0); |
| } |
| |
| for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++) |
| { |
| COPY_SEARCH_RESULT( |
| &ps_mv_l1_root[i4_j], |
| &pi1_ref_idx_l1_root[i4_j], |
| aps_result_nodes_sorted[1][i4_j], |
| 0); |
| } |
| } |
| } |
| } |
| |
| /** |
| ****************************************************************************** |
| * @brief Scales motion vector component projecte from a diff layer in same |
| * picture (so no ref id related delta poc scaling required) |
| ****************************************************************************** |
| */ |
| |
| #define SCALE_MV_COMP_RES(mvcomp_p, dim_c, dim_p) \ |
| ((((mvcomp_p) * (dim_c)) + ((SIGN((mvcomp_p)) * (dim_p)) >> 1)) / (dim_p)) |
| /** |
| ******************************************************************************** |
| * @fn hme_project_coloc_candt(search_node_t *ps_search_node, |
| * layer_ctxt_t *ps_curr_layer, |
| * layer_ctxt_t *ps_coarse_layer, |
| * S32 i4_pos_x, |
| * S32 i4_pos_y, |
| * S08 i1_ref_id, |
| * S08 i1_result_id) |
| * |
| * @brief From a coarser layer, projects a candidated situated at "colocated" |
| * position in the picture (e.g. given x, y it will be x/2, y/2 dyadic |
| * |
| * @param[out] ps_search_node : contains the projected result |
| * |
| * @param[in] ps_curr_layer : current layer context |
| * |
| * @param[in] ps_coarse_layer : coarser layer context |
| * |
| * @param[in] i4_pos_x : x Position where mv is required (w.r.t. curr layer) |
| * |
| * @param[in] i4_pos_y : y Position where mv is required (w.r.t. curr layer) |
| * |
| * @param[in] i1_ref_id : reference id for which the candidate required |
| * |
| * @param[in] i4_result_id : result id for which the candidate required |
| * (0 : best result, 1 : next best) |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| |
| void hme_project_coloc_candt( |
| search_node_t *ps_search_node, |
| layer_ctxt_t *ps_curr_layer, |
| layer_ctxt_t *ps_coarse_layer, |
| S32 i4_pos_x, |
| S32 i4_pos_y, |
| S08 i1_ref_id, |
| S32 i4_result_id) |
| { |
| S32 wd_c, ht_c, wd_p, ht_p; |
| S32 blksize_p, blk_x, blk_y, i4_offset; |
| layer_mv_t *ps_layer_mvbank; |
| hme_mv_t *ps_mv; |
| S08 *pi1_ref_idx; |
| |
| /* Width and ht of current and prev layers */ |
| wd_c = ps_curr_layer->i4_wd; |
| ht_c = ps_curr_layer->i4_ht; |
| wd_p = ps_coarse_layer->i4_wd; |
| ht_p = ps_coarse_layer->i4_ht; |
| |
| ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank; |
| blksize_p = (S32)gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size]; |
| |
| /* Safety check to avoid uninitialized access across temporal layers */ |
| i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p)); |
| i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p)); |
| |
| /* Project the positions to prev layer */ |
| /* TODO: convert these to scale factors at pic level */ |
| blk_x = (i4_pos_x * wd_p) / (wd_c * blksize_p); |
| blk_y = (i4_pos_y * ht_p) / (ht_c * blksize_p); |
| |
| /* Pick up the mvs from the location */ |
| i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk); |
| i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y); |
| |
| ps_mv = ps_layer_mvbank->ps_mv + i4_offset; |
| pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; |
| |
| ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref); |
| pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref); |
| |
| ps_search_node->s_mv.i2_mvx = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_x, wd_c, wd_p); |
| ps_search_node->s_mv.i2_mvy = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_y, ht_c, ht_p); |
| ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id]; |
| ps_search_node->u1_subpel_done = 0; |
| if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV)) |
| { |
| ps_search_node->i1_ref_idx = i1_ref_id; |
| ps_search_node->s_mv.i2_mvx = 0; |
| ps_search_node->s_mv.i2_mvy = 0; |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn hme_project_coloc_candt_dyadic(search_node_t *ps_search_node, |
| * layer_ctxt_t *ps_curr_layer, |
| * layer_ctxt_t *ps_coarse_layer, |
| * S32 i4_pos_x, |
| * S32 i4_pos_y, |
| * S08 i1_ref_id, |
| * S08 i1_result_id) |
| * |
| * @brief From a coarser layer, projects a candidated situated at "colocated" |
| * position in the picture when the ratios are dyadic |
| * |
| * @param[out] ps_search_node : contains the projected result |
| * |
| * @param[in] ps_curr_layer : current layer context |
| * |
| * @param[in] ps_coarse_layer : coarser layer context |
| * |
| * @param[in] i4_pos_x : x Position where mv is required (w.r.t. curr layer) |
| * |
| * @param[in] i4_pos_y : y Position where mv is required (w.r.t. curr layer) |
| * |
| * @param[in] i1_ref_id : reference id for which the candidate required |
| * |
| * @param[in] i4_result_id : result id for which the candidate required |
| * (0 : best result, 1 : next best) |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| |
| void hme_project_coloc_candt_dyadic( |
| search_node_t *ps_search_node, |
| layer_ctxt_t *ps_curr_layer, |
| layer_ctxt_t *ps_coarse_layer, |
| S32 i4_pos_x, |
| S32 i4_pos_y, |
| S08 i1_ref_id, |
| S32 i4_result_id) |
| { |
| S32 wd_c, ht_c, wd_p, ht_p; |
| S32 blksize_p, blk_x, blk_y, i4_offset; |
| layer_mv_t *ps_layer_mvbank; |
| hme_mv_t *ps_mv; |
| S08 *pi1_ref_idx; |
| |
| /* Width and ht of current and prev layers */ |
| wd_c = ps_curr_layer->i4_wd; |
| ht_c = ps_curr_layer->i4_ht; |
| wd_p = ps_coarse_layer->i4_wd; |
| ht_p = ps_coarse_layer->i4_ht; |
| |
| ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank; |
| /* blksize_p = log2(wd) + 1 */ |
| blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size]; |
| |
| /* ASSERT for valid sizes */ |
| ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5)); |
| |
| /* Safety check to avoid uninitialized access across temporal layers */ |
| i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p)); |
| i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p)); |
| |
| /* Project the positions to prev layer */ |
| /* TODO: convert these to scale factors at pic level */ |
| blk_x = i4_pos_x >> blksize_p; // (2 * blksize_p); |
| blk_y = i4_pos_y >> blksize_p; // (2 * blksize_p); |
| |
| /* Pick up the mvs from the location */ |
| i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk); |
| i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y); |
| |
| ps_mv = ps_layer_mvbank->ps_mv + i4_offset; |
| pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; |
| |
| ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref); |
| pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref); |
| |
| ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1; |
| ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1; |
| ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id]; |
| if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV)) |
| { |
| ps_search_node->i1_ref_idx = i1_ref_id; |
| ps_search_node->s_mv.i2_mvx = 0; |
| ps_search_node->s_mv.i2_mvy = 0; |
| } |
| } |
| |
| void hme_project_coloc_candt_dyadic_implicit( |
| search_node_t *ps_search_node, |
| layer_ctxt_t *ps_curr_layer, |
| layer_ctxt_t *ps_coarse_layer, |
| S32 i4_pos_x, |
| S32 i4_pos_y, |
| S32 i4_num_act_ref_l0, |
| U08 u1_pred_dir, |
| U08 u1_default_ref_id, |
| S32 i4_result_id) |
| { |
| S32 wd_c, ht_c, wd_p, ht_p; |
| S32 blksize_p, blk_x, blk_y, i4_offset; |
| layer_mv_t *ps_layer_mvbank; |
| hme_mv_t *ps_mv; |
| S08 *pi1_ref_idx; |
| |
| /* Width and ht of current and prev layers */ |
| wd_c = ps_curr_layer->i4_wd; |
| ht_c = ps_curr_layer->i4_ht; |
| wd_p = ps_coarse_layer->i4_wd; |
| ht_p = ps_coarse_layer->i4_ht; |
| |
| ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank; |
| blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size]; |
| |
| /* ASSERT for valid sizes */ |
| ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5)); |
| |
| /* Safety check to avoid uninitialized access across temporal layers */ |
| i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p)); |
| i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p)); |
| /* Project the positions to prev layer */ |
| /* TODO: convert these to scale factors at pic level */ |
| blk_x = i4_pos_x >> blksize_p; // (2 * blksize_p); |
| blk_y = i4_pos_y >> blksize_p; // (2 * blksize_p); |
| |
| /* Pick up the mvs from the location */ |
| i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk); |
| i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y); |
| |
| ps_mv = ps_layer_mvbank->ps_mv + i4_offset; |
| pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; |
| |
| if(u1_pred_dir == 1) |
| { |
| ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref); |
| pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref); |
| } |
| |
| ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1; |
| ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1; |
| ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id]; |
| if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV)) |
| { |
| ps_search_node->i1_ref_idx = u1_default_ref_id; |
| ps_search_node->s_mv.i2_mvx = 0; |
| ps_search_node->s_mv.i2_mvy = 0; |
| } |
| } |
| |
| #define SCALE_RANGE_PRMS(prm1, prm2, shift) \ |
| { \ |
| prm1.i2_min_x = prm2.i2_min_x << shift; \ |
| prm1.i2_max_x = prm2.i2_max_x << shift; \ |
| prm1.i2_min_y = prm2.i2_min_y << shift; \ |
| prm1.i2_max_y = prm2.i2_max_y << shift; \ |
| } |
| |
| #define SCALE_RANGE_PRMS_POINTERS(prm1, prm2, shift) \ |
| { \ |
| prm1->i2_min_x = prm2->i2_min_x << shift; \ |
| prm1->i2_max_x = prm2->i2_max_x << shift; \ |
| prm1->i2_min_y = prm2->i2_min_y << shift; \ |
| prm1->i2_max_y = prm2->i2_max_y << shift; \ |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_refine_frm_init(me_ctxt_t *ps_ctxt, |
| * refine_layer_prms_t *ps_refine_prms) |
| * |
| * @brief Frame init of refinemnet layers in ME |
| * |
| * @param[in,out] ps_ctxt: ME Handle |
| * |
| * @param[in] ps_refine_prms : refinement layer prms |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| void hme_refine_frm_init( |
| layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, layer_ctxt_t *ps_coarse_layer) |
| { |
| /* local variables */ |
| BLK_SIZE_T e_result_blk_size = BLK_8x8; |
| S32 i4_num_ref_fpel, i4_num_ref_prev_layer; |
| |
| i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref; |
| |
| if(ps_refine_prms->explicit_ref) |
| { |
| i4_num_ref_fpel = i4_num_ref_prev_layer; |
| } |
| else |
| { |
| i4_num_ref_fpel = 2; |
| } |
| |
| if(ps_refine_prms->i4_enable_4x4_part) |
| { |
| e_result_blk_size = BLK_4x4; |
| } |
| |
| i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer); |
| |
| hme_init_mv_bank( |
| ps_curr_layer, |
| e_result_blk_size, |
| i4_num_ref_fpel, |
| ps_refine_prms->i4_num_mvbank_results, |
| ps_refine_prms->i4_layer_id > 0 ? 0 : 1); |
| } |
| |
| #if 1 //ENABLE_CU_RECURSION || TEST_AND_EVALUATE_CU_RECURSION |
| /** |
| ******************************************************************************** |
| * @fn void hme_init_clusters_16x16 |
| * ( |
| * cluster_16x16_blk_t *ps_cluster_blk_16x16 |
| * ) |
| * |
| * @brief Intialisations for the structs used in clustering algorithm |
| * |
| * @param[in/out] ps_cluster_blk_16x16: pointer to structure containing clusters |
| * of 16x16 block |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| static __inline void |
| hme_init_clusters_16x16(cluster_16x16_blk_t *ps_cluster_blk_16x16, S32 bidir_enabled) |
| { |
| S32 i; |
| |
| ps_cluster_blk_16x16->num_clusters = 0; |
| ps_cluster_blk_16x16->intra_mv_area = 0; |
| ps_cluster_blk_16x16->best_inter_cost = 0; |
| |
| for(i = 0; i < MAX_NUM_CLUSTERS_16x16; i++) |
| { |
| ps_cluster_blk_16x16->as_cluster_data[i].max_dist_from_centroid = |
| bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_16x16_B : MAX_DISTANCE_FROM_CENTROID_16x16; |
| |
| ps_cluster_blk_16x16->as_cluster_data[i].is_valid_cluster = 0; |
| |
| ps_cluster_blk_16x16->as_cluster_data[i].bi_mv_pixel_area = 0; |
| ps_cluster_blk_16x16->as_cluster_data[i].uni_mv_pixel_area = 0; |
| } |
| for(i = 0; i < MAX_NUM_REF; i++) |
| { |
| ps_cluster_blk_16x16->au1_num_clusters[i] = 0; |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_init_clusters_32x32 |
| * ( |
| * cluster_32x32_blk_t *ps_cluster_blk_32x32 |
| * ) |
| * |
| * @brief Intialisations for the structs used in clustering algorithm |
| * |
| * @param[in/out] ps_cluster_blk_32x32: pointer to structure containing clusters |
| * of 32x32 block |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| static __inline void |
| hme_init_clusters_32x32(cluster_32x32_blk_t *ps_cluster_blk_32x32, S32 bidir_enabled) |
| { |
| S32 i; |
| |
| ps_cluster_blk_32x32->num_clusters = 0; |
| ps_cluster_blk_32x32->intra_mv_area = 0; |
| ps_cluster_blk_32x32->best_alt_ref = -1; |
| ps_cluster_blk_32x32->best_uni_ref = -1; |
| ps_cluster_blk_32x32->best_inter_cost = 0; |
| ps_cluster_blk_32x32->num_clusters_with_weak_sdi_density = 0; |
| |
| for(i = 0; i < MAX_NUM_CLUSTERS_32x32; i++) |
| { |
| ps_cluster_blk_32x32->as_cluster_data[i].max_dist_from_centroid = |
| bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_32x32_B : MAX_DISTANCE_FROM_CENTROID_32x32; |
| ps_cluster_blk_32x32->as_cluster_data[i].is_valid_cluster = 0; |
| |
| ps_cluster_blk_32x32->as_cluster_data[i].bi_mv_pixel_area = 0; |
| ps_cluster_blk_32x32->as_cluster_data[i].uni_mv_pixel_area = 0; |
| } |
| for(i = 0; i < MAX_NUM_REF; i++) |
| { |
| ps_cluster_blk_32x32->au1_num_clusters[i] = 0; |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_init_clusters_64x64 |
| * ( |
| * cluster_64x64_blk_t *ps_cluster_blk_64x64 |
| * ) |
| * |
| * @brief Intialisations for the structs used in clustering algorithm |
| * |
| * @param[in/out] ps_cluster_blk_64x64: pointer to structure containing clusters |
| * of 64x64 block |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| static __inline void |
| hme_init_clusters_64x64(cluster_64x64_blk_t *ps_cluster_blk_64x64, S32 bidir_enabled) |
| { |
| S32 i; |
| |
| ps_cluster_blk_64x64->num_clusters = 0; |
| ps_cluster_blk_64x64->intra_mv_area = 0; |
| ps_cluster_blk_64x64->best_alt_ref = -1; |
| ps_cluster_blk_64x64->best_uni_ref = -1; |
| ps_cluster_blk_64x64->best_inter_cost = 0; |
| |
| for(i = 0; i < MAX_NUM_CLUSTERS_64x64; i++) |
| { |
| ps_cluster_blk_64x64->as_cluster_data[i].max_dist_from_centroid = |
| bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_64x64_B : MAX_DISTANCE_FROM_CENTROID_64x64; |
| ps_cluster_blk_64x64->as_cluster_data[i].is_valid_cluster = 0; |
| |
| ps_cluster_blk_64x64->as_cluster_data[i].bi_mv_pixel_area = 0; |
| ps_cluster_blk_64x64->as_cluster_data[i].uni_mv_pixel_area = 0; |
| } |
| for(i = 0; i < MAX_NUM_REF; i++) |
| { |
| ps_cluster_blk_64x64->au1_num_clusters[i] = 0; |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_sort_and_assign_top_ref_ids_areawise |
| * ( |
| * ctb_cluster_info_t *ps_ctb_cluster_info |
| * ) |
| * |
| * @brief Finds best_uni_ref and best_alt_ref |
| * |
| * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data |
| * |
| * @param[in] bidir_enabled: flag that indicates whether or not bi-pred is |
| * enabled |
| * |
| * @param[in] block_width: width of the block in pels |
| * |
| * @param[in] e_cu_pos: position of the block within the CTB |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| void hme_sort_and_assign_top_ref_ids_areawise( |
| ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width, CU_POS_T e_cu_pos) |
| { |
| cluster_32x32_blk_t *ps_32x32 = NULL; |
| cluster_64x64_blk_t *ps_64x64 = NULL; |
| cluster_data_t *ps_data; |
| |
| S32 j, k; |
| |
| S32 ai4_uni_area[MAX_NUM_REF]; |
| S32 ai4_bi_area[MAX_NUM_REF]; |
| S32 ai4_ref_id_found[MAX_NUM_REF]; |
| S32 ai4_ref_id[MAX_NUM_REF]; |
| |
| S32 best_uni_ref = -1, best_alt_ref = -1; |
| S32 num_clusters; |
| S32 num_ref = 0; |
| S32 num_clusters_evaluated = 0; |
| S32 is_cur_blk_valid; |
| |
| if(32 == block_width) |
| { |
| is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << e_cu_pos)) || 0; |
| ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cu_pos]; |
| num_clusters = ps_32x32->num_clusters; |
| ps_data = &ps_32x32->as_cluster_data[0]; |
| } |
| else |
| { |
| is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask == 0xf); |
| ps_64x64 = ps_ctb_cluster_info->ps_64x64_blk; |
| num_clusters = ps_64x64->num_clusters; |
| ps_data = &ps_64x64->as_cluster_data[0]; |
| } |
| |
| #if !ENABLE_4CTB_EVALUATION |
| if((num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)) |
| { |
| return; |
| } |
| #endif |
| if(num_clusters == 0) |
| { |
| return; |
| } |
| else if(!is_cur_blk_valid) |
| { |
| return; |
| } |
| |
| memset(ai4_uni_area, 0, sizeof(S32) * MAX_NUM_REF); |
| memset(ai4_bi_area, 0, sizeof(S32) * MAX_NUM_REF); |
| memset(ai4_ref_id_found, 0, sizeof(S32) * MAX_NUM_REF); |
| memset(ai4_ref_id, -1, sizeof(S32) * MAX_NUM_REF); |
| |
| for(j = 0; num_clusters_evaluated < num_clusters; j++, ps_data++) |
| { |
| S32 ref_id; |
| |
| if(!ps_data->is_valid_cluster) |
| { |
| continue; |
| } |
| |
| ref_id = ps_data->ref_id; |
| |
| num_clusters_evaluated++; |
| |
| ai4_uni_area[ref_id] += ps_data->uni_mv_pixel_area; |
| ai4_bi_area[ref_id] += ps_data->bi_mv_pixel_area; |
| |
| if(!ai4_ref_id_found[ref_id]) |
| { |
| ai4_ref_id[ref_id] = ref_id; |
| ai4_ref_id_found[ref_id] = 1; |
| num_ref++; |
| } |
| } |
| |
| { |
| S32 ai4_ref_id_temp[MAX_NUM_REF]; |
| |
| memcpy(ai4_ref_id_temp, ai4_ref_id, sizeof(S32) * MAX_NUM_REF); |
| |
| for(k = 1; k < MAX_NUM_REF; k++) |
| { |
| if(ai4_uni_area[k] > ai4_uni_area[0]) |
| { |
| SWAP_HME(ai4_uni_area[k], ai4_uni_area[0], S32); |
| SWAP_HME(ai4_ref_id_temp[k], ai4_ref_id_temp[0], S32); |
| } |
| } |
| |
| best_uni_ref = ai4_ref_id_temp[0]; |
| } |
| |
| if(bidir_enabled) |
| { |
| for(k = 1; k < MAX_NUM_REF; k++) |
| { |
| if(ai4_bi_area[k] > ai4_bi_area[0]) |
| { |
| SWAP_HME(ai4_bi_area[k], ai4_bi_area[0], S32); |
| SWAP_HME(ai4_ref_id[k], ai4_ref_id[0], S32); |
| } |
| } |
| |
| if(!ai4_bi_area[0]) |
| { |
| best_alt_ref = -1; |
| |
| if(32 == block_width) |
| { |
| SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref); |
| } |
| else |
| { |
| SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref); |
| } |
| |
| return; |
| } |
| |
| if(best_uni_ref == ai4_ref_id[0]) |
| { |
| for(k = 2; k < MAX_NUM_REF; k++) |
| { |
| if(ai4_bi_area[k] > ai4_bi_area[1]) |
| { |
| SWAP_HME(ai4_bi_area[k], ai4_bi_area[1], S32); |
| SWAP_HME(ai4_ref_id[k], ai4_ref_id[1], S32); |
| } |
| } |
| |
| best_alt_ref = ai4_ref_id[1]; |
| } |
| else |
| { |
| best_alt_ref = ai4_ref_id[0]; |
| } |
| } |
| |
| if(32 == block_width) |
| { |
| SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref); |
| } |
| else |
| { |
| SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref); |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_find_top_ref_ids |
| * ( |
| * ctb_cluster_info_t *ps_ctb_cluster_info |
| * ) |
| * |
| * @brief Finds best_uni_ref and best_alt_ref |
| * |
| * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| void hme_find_top_ref_ids( |
| ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width) |
| { |
| S32 i; |
| |
| if(32 == block_width) |
| { |
| for(i = 0; i < 4; i++) |
| { |
| hme_sort_and_assign_top_ref_ids_areawise( |
| ps_ctb_cluster_info, bidir_enabled, block_width, (CU_POS_T)i); |
| } |
| } |
| else if(64 == block_width) |
| { |
| hme_sort_and_assign_top_ref_ids_areawise( |
| ps_ctb_cluster_info, bidir_enabled, block_width, POS_NA); |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_boot_out_outlier |
| * ( |
| * ctb_cluster_info_t *ps_ctb_cluster_info |
| * ) |
| * |
| * @brief Removes outlier clusters before CU tree population |
| * |
| * @param[in/out] ps_ctb_cluster_info: structure that points to ctb data |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| void hme_boot_out_outlier(ctb_cluster_info_t *ps_ctb_cluster_info, S32 blk_width) |
| { |
| cluster_32x32_blk_t *ps_32x32; |
| |
| S32 i; |
| |
| cluster_64x64_blk_t *ps_64x64 = &ps_ctb_cluster_info->ps_64x64_blk[0]; |
| |
| S32 sdi_threshold = ps_ctb_cluster_info->sdi_threshold; |
| |
| if(32 == blk_width) |
| { |
| /* 32x32 clusters */ |
| for(i = 0; i < 4; i++) |
| { |
| ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i]; |
| |
| if(ps_32x32->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX) |
| { |
| BUMP_OUTLIER_CLUSTERS(ps_32x32, sdi_threshold); |
| } |
| } |
| } |
| else if(64 == blk_width) |
| { |
| /* 64x64 clusters */ |
| if(ps_64x64->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX) |
| { |
| BUMP_OUTLIER_CLUSTERS(ps_64x64, sdi_threshold); |
| } |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_update_cluster_attributes |
| * ( |
| * cluster_data_t *ps_cluster_data, |
| * S32 mvx, |
| * S32 mvy, |
| * PART_ID_T e_part_id |
| * ) |
| * |
| * @brief Implementation fo the clustering algorithm |
| * |
| * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct |
| * |
| * @param[in] mvx : x co-ordinate of the motion vector |
| * |
| * @param[in] mvy : y co-ordinate of the motion vector |
| * |
| * @param[in] ref_idx : ref_id of the motion vector |
| * |
| * @param[in] e_part_id : partition id of the motion vector |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| static __inline void hme_update_cluster_attributes( |
| cluster_data_t *ps_cluster_data, |
| S32 mvx, |
| S32 mvy, |
| S32 mvdx, |
| S32 mvdy, |
| S32 ref_id, |
| S32 sdi, |
| U08 is_part_of_bi, |
| PART_ID_T e_part_id) |
| { |
| LWORD64 i8_mvx_sum_q8; |
| LWORD64 i8_mvy_sum_q8; |
| |
| S32 centroid_posx_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8; |
| S32 centroid_posy_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8; |
| |
| if((mvdx > 0) && (ps_cluster_data->min_x > mvx)) |
| { |
| ps_cluster_data->min_x = mvx; |
| } |
| else if((mvdx < 0) && (ps_cluster_data->max_x < mvx)) |
| { |
| ps_cluster_data->max_x = mvx; |
| } |
| |
| if((mvdy > 0) && (ps_cluster_data->min_y > mvy)) |
| { |
| ps_cluster_data->min_y = mvy; |
| } |
| else if((mvdy < 0) && (ps_cluster_data->max_y < mvy)) |
| { |
| ps_cluster_data->max_y = mvy; |
| } |
| |
| { |
| S32 num_mvs = ps_cluster_data->num_mvs; |
| |
| ps_cluster_data->as_mv[num_mvs].pixel_count = gai4_partition_area[e_part_id]; |
| ps_cluster_data->as_mv[num_mvs].mvx = mvx; |
| ps_cluster_data->as_mv[num_mvs].mvy = mvy; |
| |
| /***************************/ |
| ps_cluster_data->as_mv[num_mvs].is_uni = !is_part_of_bi; |
| ps_cluster_data->as_mv[num_mvs].sdi = sdi; |
| /**************************/ |
| } |
| |
| /* Updation of centroid */ |
| { |
| i8_mvx_sum_q8 = (LWORD64)centroid_posx_q8 * ps_cluster_data->num_mvs + (mvx << 8); |
| i8_mvy_sum_q8 = (LWORD64)centroid_posy_q8 * ps_cluster_data->num_mvs + (mvy << 8); |
| |
| ps_cluster_data->num_mvs++; |
| |
| ps_cluster_data->s_centroid.i4_pos_x_q8 = |
| (WORD32)((i8_mvx_sum_q8) / ps_cluster_data->num_mvs); |
| ps_cluster_data->s_centroid.i4_pos_y_q8 = |
| (WORD32)((i8_mvy_sum_q8) / ps_cluster_data->num_mvs); |
| } |
| |
| ps_cluster_data->area_in_pixels += gai4_partition_area[e_part_id]; |
| |
| if(is_part_of_bi) |
| { |
| ps_cluster_data->bi_mv_pixel_area += gai4_partition_area[e_part_id]; |
| } |
| else |
| { |
| ps_cluster_data->uni_mv_pixel_area += gai4_partition_area[e_part_id]; |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_try_cluster_merge |
| * ( |
| * cluster_data_t *ps_cluster_data, |
| * S32 *pi4_num_clusters, |
| * S32 idx_of_updated_cluster |
| * ) |
| * |
| * @brief Implementation fo the clustering algorithm |
| * |
| * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct |
| * |
| * @param[in/out] pi4_num_clusters : pointer to number of clusters |
| * |
| * @param[in] idx_of_updated_cluster : index of the cluster most recently |
| * updated |
| * |
| * @return Nothing |
| ******************************************************************************** |
| */ |
| void hme_try_cluster_merge( |
| cluster_data_t *ps_cluster_data, U08 *pu1_num_clusters, S32 idx_of_updated_cluster) |
| { |
| centroid_t *ps_centroid; |
| |
| S32 cur_pos_x_q8; |
| S32 cur_pos_y_q8; |
| S32 i; |
| S32 max_dist_from_centroid; |
| S32 mvd; |
| S32 mvdx_q8; |
| S32 mvdx; |
| S32 mvdy_q8; |
| S32 mvdy; |
| S32 num_clusters, num_clusters_evaluated; |
| S32 other_pos_x_q8; |
| S32 other_pos_y_q8; |
| |
| cluster_data_t *ps_root = ps_cluster_data; |
| cluster_data_t *ps_cur_cluster = &ps_cluster_data[idx_of_updated_cluster]; |
| centroid_t *ps_cur_centroid = &ps_cur_cluster->s_centroid; |
| |
| /* Merge is superfluous if num_clusters is 1 */ |
| if(*pu1_num_clusters == 1) |
| { |
| return; |
| } |
| |
| cur_pos_x_q8 = ps_cur_centroid->i4_pos_x_q8; |
| cur_pos_y_q8 = ps_cur_centroid->i4_pos_y_q8; |
| |
| max_dist_from_centroid = ps_cur_cluster->max_dist_from_centroid; |
| |
| num_clusters = *pu1_num_clusters; |
| num_clusters_evaluated = 0; |
| |
| for(i = 0; num_clusters_evaluated < num_clusters; i++, ps_cluster_data++) |
| { |
| if(!ps_cluster_data->is_valid_cluster) |
| { |
| continue; |
| } |
| if((ps_cluster_data->ref_id != ps_cur_cluster->ref_id) || (i == idx_of_updated_cluster)) |
| { |
| num_clusters_evaluated++; |
| continue; |
| } |
| |
| ps_centroid = &ps_cluster_data->s_centroid; |
| |
| other_pos_x_q8 = ps_centroid->i4_pos_x_q8; |
| other_pos_y_q8 = ps_centroid->i4_pos_y_q8; |
| |
| mvdx_q8 = (cur_pos_x_q8 - other_pos_x_q8); |
| mvdy_q8 = (cur_pos_y_q8 - other_pos_y_q8); |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvd = ABS(mvdx) + ABS(mvdy); |
| |
| if(mvd <= (max_dist_from_centroid >> 1)) |
| { |
| /* 0 => no updates */ |
| /* 1 => min updated */ |
| /* 2 => max updated */ |
| S32 minmax_x_update_id; |
| S32 minmax_y_update_id; |
| |
| LWORD64 i8_mv_x_sum_self = (LWORD64)cur_pos_x_q8 * ps_cur_cluster->num_mvs; |
| LWORD64 i8_mv_y_sum_self = (LWORD64)cur_pos_y_q8 * ps_cur_cluster->num_mvs; |
| LWORD64 i8_mv_x_sum_cousin = (LWORD64)other_pos_x_q8 * ps_cluster_data->num_mvs; |
| LWORD64 i8_mv_y_sum_cousin = (LWORD64)other_pos_y_q8 * ps_cluster_data->num_mvs; |
| |
| (*pu1_num_clusters)--; |
| |
| ps_cluster_data->is_valid_cluster = 0; |
| |
| memcpy( |
| &ps_cur_cluster->as_mv[ps_cur_cluster->num_mvs], |
| ps_cluster_data->as_mv, |
| sizeof(mv_data_t) * ps_cluster_data->num_mvs); |
| |
| ps_cur_cluster->num_mvs += ps_cluster_data->num_mvs; |
| ps_cur_cluster->area_in_pixels += ps_cluster_data->area_in_pixels; |
| ps_cur_cluster->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; |
| ps_cur_cluster->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; |
| i8_mv_x_sum_self += i8_mv_x_sum_cousin; |
| i8_mv_y_sum_self += i8_mv_y_sum_cousin; |
| |
| ps_cur_centroid->i4_pos_x_q8 = (WORD32)(i8_mv_x_sum_self / ps_cur_cluster->num_mvs); |
| ps_cur_centroid->i4_pos_y_q8 = (WORD32)(i8_mv_y_sum_self / ps_cur_cluster->num_mvs); |
| |
| minmax_x_update_id = (ps_cur_cluster->min_x < ps_cluster_data->min_x) |
| ? ((ps_cur_cluster->max_x > ps_cluster_data->max_x) ? 0 : 2) |
| : 1; |
| minmax_y_update_id = (ps_cur_cluster->min_y < ps_cluster_data->min_y) |
| ? ((ps_cur_cluster->max_y > ps_cluster_data->max_y) ? 0 : 2) |
| : 1; |
| |
| /* Updation of centroid spread */ |
| switch(minmax_x_update_id + (minmax_y_update_id << 2)) |
| { |
| case 1: |
| { |
| S32 mvd, mvd_q8; |
| |
| ps_cur_cluster->min_x = ps_cluster_data->min_x; |
| |
| mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8); |
| mvd = (mvd_q8 + (1 << 7)) >> 8; |
| |
| if(mvd > (max_dist_from_centroid)) |
| { |
| ps_cluster_data->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 2: |
| { |
| S32 mvd, mvd_q8; |
| |
| ps_cur_cluster->max_x = ps_cluster_data->max_x; |
| |
| mvd_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8; |
| mvd = (mvd_q8 + (1 << 7)) >> 8; |
| |
| if(mvd > (max_dist_from_centroid)) |
| { |
| ps_cluster_data->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 4: |
| { |
| S32 mvd, mvd_q8; |
| |
| ps_cur_cluster->min_y = ps_cluster_data->min_y; |
| |
| mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8); |
| mvd = (mvd_q8 + (1 << 7)) >> 8; |
| |
| if(mvd > (max_dist_from_centroid)) |
| { |
| ps_cluster_data->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 5: |
| { |
| S32 mvd; |
| S32 mvdx, mvdx_q8; |
| S32 mvdy, mvdy_q8; |
| |
| mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8); |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8); |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| |
| mvd = (mvdx > mvdy) ? mvdx : mvdy; |
| |
| ps_cur_cluster->min_x = ps_cluster_data->min_x; |
| ps_cur_cluster->min_y = ps_cluster_data->min_y; |
| |
| if(mvd > max_dist_from_centroid) |
| { |
| ps_cluster_data->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 6: |
| { |
| S32 mvd; |
| S32 mvdx, mvdx_q8; |
| S32 mvdy, mvdy_q8; |
| |
| mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8); |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8; |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| |
| mvd = (mvdx > mvdy) ? mvdx : mvdy; |
| |
| ps_cur_cluster->max_x = ps_cluster_data->max_x; |
| ps_cur_cluster->min_y = ps_cluster_data->min_y; |
| |
| if(mvd > max_dist_from_centroid) |
| { |
| ps_cluster_data->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 8: |
| { |
| S32 mvd, mvd_q8; |
| |
| ps_cur_cluster->max_y = ps_cluster_data->max_y; |
| |
| mvd_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8; |
| mvd = (mvd_q8 + (1 << 7)) >> 8; |
| |
| if(mvd > (max_dist_from_centroid)) |
| { |
| ps_cluster_data->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 9: |
| { |
| S32 mvd; |
| S32 mvdx, mvdx_q8; |
| S32 mvdy, mvdy_q8; |
| |
| mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8); |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| |
| mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8; |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvd = (mvdx > mvdy) ? mvdx : mvdy; |
| |
| ps_cur_cluster->min_x = ps_cluster_data->min_x; |
| ps_cur_cluster->max_y = ps_cluster_data->max_y; |
| |
| if(mvd > max_dist_from_centroid) |
| { |
| ps_cluster_data->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 10: |
| { |
| S32 mvd; |
| S32 mvdx, mvdx_q8; |
| S32 mvdy, mvdy_q8; |
| |
| mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8; |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| |
| mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8; |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvd = (mvdx > mvdy) ? mvdx : mvdy; |
| |
| ps_cur_cluster->max_x = ps_cluster_data->max_x; |
| ps_cur_cluster->max_y = ps_cluster_data->max_y; |
| |
| if(mvd > ps_cluster_data->max_dist_from_centroid) |
| { |
| ps_cluster_data->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| default: |
| { |
| break; |
| } |
| } |
| |
| hme_try_cluster_merge(ps_root, pu1_num_clusters, idx_of_updated_cluster); |
| |
| return; |
| } |
| |
| num_clusters_evaluated++; |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_find_and_update_clusters |
| * ( |
| * cluster_data_t *ps_cluster_data, |
| * S32 *pi4_num_clusters, |
| * S32 mvx, |
| * S32 mvy, |
| * S32 ref_idx, |
| * PART_ID_T e_part_id |
| * ) |
| * |
| * @brief Implementation fo the clustering algorithm |
| * |
| * @param[in/out] ps_cluster_data: pointer to cluster_data_t struct |
| * |
| * @param[in/out] pi4_num_clusters : pointer to number of clusters |
| * |
| * @param[in] mvx : x co-ordinate of the motion vector |
| * |
| * @param[in] mvy : y co-ordinate of the motion vector |
| * |
| * @param[in] ref_idx : ref_id of the motion vector |
| * |
| * @param[in] e_part_id : partition id of the motion vector |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| void hme_find_and_update_clusters( |
| cluster_data_t *ps_cluster_data, |
| U08 *pu1_num_clusters, |
| S16 i2_mv_x, |
| S16 i2_mv_y, |
| U08 i1_ref_idx, |
| S32 i4_sdi, |
| PART_ID_T e_part_id, |
| U08 is_part_of_bi) |
| { |
| S32 i; |
| S32 min_mvd_cluster_id = -1; |
| S32 mvd, mvd_limit, mvdx, mvdy; |
| S32 min_mvdx, min_mvdy; |
| |
| S32 min_mvd = MAX_32BIT_VAL; |
| S32 num_clusters = *pu1_num_clusters; |
| |
| S32 mvx = i2_mv_x; |
| S32 mvy = i2_mv_y; |
| S32 ref_idx = i1_ref_idx; |
| S32 sdi = i4_sdi; |
| S32 new_cluster_idx = MAX_NUM_CLUSTERS_16x16; |
| |
| if(num_clusters == 0) |
| { |
| cluster_data_t *ps_data = &ps_cluster_data[num_clusters]; |
| |
| ps_data->num_mvs = 1; |
| ps_data->s_centroid.i4_pos_x_q8 = mvx << 8; |
| ps_data->s_centroid.i4_pos_y_q8 = mvy << 8; |
| ps_data->ref_id = ref_idx; |
| ps_data->area_in_pixels = gai4_partition_area[e_part_id]; |
| ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id]; |
| ps_data->as_mv[0].mvx = mvx; |
| ps_data->as_mv[0].mvy = mvy; |
| |
| /***************************/ |
| ps_data->as_mv[0].is_uni = !is_part_of_bi; |
| ps_data->as_mv[0].sdi = sdi; |
| if(is_part_of_bi) |
| { |
| ps_data->bi_mv_pixel_area += ps_data->area_in_pixels; |
| } |
| else |
| { |
| ps_data->uni_mv_pixel_area += ps_data->area_in_pixels; |
| } |
| /**************************/ |
| ps_data->max_x = mvx; |
| ps_data->min_x = mvx; |
| ps_data->max_y = mvy; |
| ps_data->min_y = mvy; |
| |
| ps_data->is_valid_cluster = 1; |
| |
| *pu1_num_clusters = 1; |
| } |
| else |
| { |
| S32 num_clusters_evaluated = 0; |
| |
| for(i = 0; num_clusters_evaluated < num_clusters; i++) |
| { |
| cluster_data_t *ps_data = &ps_cluster_data[i]; |
| |
| centroid_t *ps_centroid; |
| |
| S32 mvx_q8; |
| S32 mvy_q8; |
| S32 posx_q8; |
| S32 posy_q8; |
| S32 mvdx_q8; |
| S32 mvdy_q8; |
| |
| /* In anticipation of a possible merging of clusters */ |
| if(ps_data->is_valid_cluster == 0) |
| { |
| new_cluster_idx = i; |
| continue; |
| } |
| |
| if(ref_idx != ps_data->ref_id) |
| { |
| num_clusters_evaluated++; |
| continue; |
| } |
| |
| ps_centroid = &ps_data->s_centroid; |
| posx_q8 = ps_centroid->i4_pos_x_q8; |
| posy_q8 = ps_centroid->i4_pos_y_q8; |
| |
| mvx_q8 = mvx << 8; |
| mvy_q8 = mvy << 8; |
| |
| mvdx_q8 = posx_q8 - mvx_q8; |
| mvdy_q8 = posy_q8 - mvy_q8; |
| |
| mvdx = (((mvdx_q8 + (1 << 7)) >> 8)); |
| mvdy = (((mvdy_q8 + (1 << 7)) >> 8)); |
| |
| mvd = ABS(mvdx) + ABS(mvdy); |
| |
| if(mvd < min_mvd) |
| { |
| min_mvd = mvd; |
| min_mvdx = mvdx; |
| min_mvdy = mvdy; |
| min_mvd_cluster_id = i; |
| } |
| |
| num_clusters_evaluated++; |
| } |
| |
| mvd_limit = (min_mvd_cluster_id == -1) |
| ? ps_cluster_data[0].max_dist_from_centroid |
| : ps_cluster_data[min_mvd_cluster_id].max_dist_from_centroid; |
| |
| /* This condition implies that min_mvd has been updated */ |
| if(min_mvd <= mvd_limit) |
| { |
| hme_update_cluster_attributes( |
| &ps_cluster_data[min_mvd_cluster_id], |
| mvx, |
| mvy, |
| min_mvdx, |
| min_mvdy, |
| ref_idx, |
| sdi, |
| is_part_of_bi, |
| e_part_id); |
| |
| if(PRT_NxN == ge_part_id_to_part_type[e_part_id]) |
| { |
| hme_try_cluster_merge(ps_cluster_data, pu1_num_clusters, min_mvd_cluster_id); |
| } |
| } |
| else |
| { |
| cluster_data_t *ps_data = (new_cluster_idx == MAX_NUM_CLUSTERS_16x16) |
| ? &ps_cluster_data[num_clusters] |
| : &ps_cluster_data[new_cluster_idx]; |
| |
| ps_data->num_mvs = 1; |
| ps_data->s_centroid.i4_pos_x_q8 = mvx << 8; |
| ps_data->s_centroid.i4_pos_y_q8 = mvy << 8; |
| ps_data->ref_id = ref_idx; |
| ps_data->area_in_pixels = gai4_partition_area[e_part_id]; |
| ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id]; |
| ps_data->as_mv[0].mvx = mvx; |
| ps_data->as_mv[0].mvy = mvy; |
| |
| /***************************/ |
| ps_data->as_mv[0].is_uni = !is_part_of_bi; |
| ps_data->as_mv[0].sdi = sdi; |
| if(is_part_of_bi) |
| { |
| ps_data->bi_mv_pixel_area += ps_data->area_in_pixels; |
| } |
| else |
| { |
| ps_data->uni_mv_pixel_area += ps_data->area_in_pixels; |
| } |
| /**************************/ |
| ps_data->max_x = mvx; |
| ps_data->min_x = mvx; |
| ps_data->max_y = mvy; |
| ps_data->min_y = mvy; |
| |
| ps_data->is_valid_cluster = 1; |
| |
| num_clusters++; |
| *pu1_num_clusters = num_clusters; |
| } |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_update_32x32_cluster_attributes |
| * ( |
| * cluster_32x32_blk_t *ps_blk_32x32, |
| * cluster_data_t *ps_cluster_data |
| * ) |
| * |
| * @brief Updates attributes for 32x32 clusters based on the attributes of |
| * the constituent 16x16 clusters |
| * |
| * @param[out] ps_blk_32x32: structure containing 32x32 block results |
| * |
| * @param[in] ps_cluster_data : structure containing 16x16 block results |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| void hme_update_32x32_cluster_attributes( |
| cluster_32x32_blk_t *ps_blk_32x32, cluster_data_t *ps_cluster_data) |
| { |
| cluster_data_t *ps_cur_cluster_32; |
| |
| S32 i; |
| S32 mvd_limit; |
| |
| S32 num_clusters = ps_blk_32x32->num_clusters; |
| |
| if(0 == num_clusters) |
| { |
| ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0]; |
| |
| ps_blk_32x32->num_clusters++; |
| ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++; |
| |
| ps_cur_cluster_32->is_valid_cluster = 1; |
| |
| ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels; |
| ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; |
| ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; |
| |
| memcpy( |
| ps_cur_cluster_32->as_mv, |
| ps_cluster_data->as_mv, |
| sizeof(mv_data_t) * ps_cluster_data->num_mvs); |
| |
| ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs; |
| |
| ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id; |
| |
| ps_cur_cluster_32->max_x = ps_cluster_data->max_x; |
| ps_cur_cluster_32->max_y = ps_cluster_data->max_y; |
| ps_cur_cluster_32->min_x = ps_cluster_data->min_x; |
| ps_cur_cluster_32->min_y = ps_cluster_data->min_y; |
| |
| ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid; |
| } |
| else |
| { |
| centroid_t *ps_centroid; |
| |
| S32 cur_posx_q8, cur_posy_q8; |
| S32 min_mvd_cluster_id = -1; |
| S32 mvd; |
| S32 mvdx; |
| S32 mvdy; |
| S32 mvdx_min; |
| S32 mvdy_min; |
| S32 mvdx_q8; |
| S32 mvdy_q8; |
| |
| S32 num_clusters_evaluated = 0; |
| |
| S32 mvd_min = MAX_32BIT_VAL; |
| |
| S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8; |
| S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8; |
| |
| for(i = 0; num_clusters_evaluated < num_clusters; i++) |
| { |
| ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[i]; |
| |
| if(ps_cur_cluster_32->ref_id != ps_cluster_data->ref_id) |
| { |
| num_clusters_evaluated++; |
| continue; |
| } |
| if(!ps_cluster_data->is_valid_cluster) |
| { |
| continue; |
| } |
| |
| num_clusters_evaluated++; |
| |
| ps_centroid = &ps_cur_cluster_32->s_centroid; |
| |
| cur_posx_q8 = ps_centroid->i4_pos_x_q8; |
| cur_posy_q8 = ps_centroid->i4_pos_y_q8; |
| |
| mvdx_q8 = cur_posx_q8 - mvx_inp_q8; |
| mvdy_q8 = cur_posy_q8 - mvy_inp_q8; |
| |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvd = ABS(mvdx) + ABS(mvdy); |
| |
| if(mvd < mvd_min) |
| { |
| mvd_min = mvd; |
| mvdx_min = mvdx; |
| mvdy_min = mvdy; |
| min_mvd_cluster_id = i; |
| } |
| } |
| |
| ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0]; |
| |
| mvd_limit = (min_mvd_cluster_id == -1) |
| ? ps_cur_cluster_32[0].max_dist_from_centroid |
| : ps_cur_cluster_32[min_mvd_cluster_id].max_dist_from_centroid; |
| |
| if(mvd_min <= mvd_limit) |
| { |
| LWORD64 i8_updated_posx; |
| LWORD64 i8_updated_posy; |
| WORD32 minmax_updated_x = 0; |
| WORD32 minmax_updated_y = 0; |
| |
| ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[min_mvd_cluster_id]; |
| |
| ps_centroid = &ps_cur_cluster_32->s_centroid; |
| |
| ps_cur_cluster_32->is_valid_cluster = 1; |
| |
| ps_cur_cluster_32->area_in_pixels += ps_cluster_data->area_in_pixels; |
| ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; |
| ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; |
| |
| memcpy( |
| &ps_cur_cluster_32->as_mv[ps_cur_cluster_32->num_mvs], |
| ps_cluster_data->as_mv, |
| sizeof(mv_data_t) * ps_cluster_data->num_mvs); |
| |
| if((mvdx_min > 0) && ((ps_cur_cluster_32->min_x << 8) > mvx_inp_q8)) |
| { |
| ps_cur_cluster_32->min_x = (mvx_inp_q8 + ((1 << 7))) >> 8; |
| minmax_updated_x = 1; |
| } |
| else if((mvdx_min < 0) && ((ps_cur_cluster_32->max_x << 8) < mvx_inp_q8)) |
| { |
| ps_cur_cluster_32->max_x = (mvx_inp_q8 + (1 << 7)) >> 8; |
| minmax_updated_x = 2; |
| } |
| |
| if((mvdy_min > 0) && ((ps_cur_cluster_32->min_y << 8) > mvy_inp_q8)) |
| { |
| ps_cur_cluster_32->min_y = (mvy_inp_q8 + (1 << 7)) >> 8; |
| minmax_updated_y = 1; |
| } |
| else if((mvdy_min < 0) && ((ps_cur_cluster_32->max_y << 8) < mvy_inp_q8)) |
| { |
| ps_cur_cluster_32->max_y = (mvy_inp_q8 + (1 << 7)) >> 8; |
| minmax_updated_y = 2; |
| } |
| |
| switch((minmax_updated_y << 2) + minmax_updated_x) |
| { |
| case 1: |
| { |
| S32 mvd, mvd_q8; |
| |
| mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8); |
| mvd = (mvd_q8 + (1 << 7)) >> 8; |
| |
| if(mvd > (mvd_limit)) |
| { |
| ps_cur_cluster_32->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 2: |
| { |
| S32 mvd, mvd_q8; |
| |
| mvd_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8; |
| mvd = (mvd_q8 + (1 << 7)) >> 8; |
| |
| if(mvd > (mvd_limit)) |
| { |
| ps_cur_cluster_32->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 4: |
| { |
| S32 mvd, mvd_q8; |
| |
| mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8); |
| mvd = (mvd_q8 + (1 << 7)) >> 8; |
| |
| if(mvd > (mvd_limit)) |
| { |
| ps_cur_cluster_32->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 5: |
| { |
| S32 mvd; |
| S32 mvdx, mvdx_q8; |
| S32 mvdy, mvdy_q8; |
| |
| mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8); |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8); |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| |
| mvd = (mvdx > mvdy) ? mvdx : mvdy; |
| |
| if(mvd > mvd_limit) |
| { |
| ps_cur_cluster_32->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 6: |
| { |
| S32 mvd; |
| S32 mvdx, mvdx_q8; |
| S32 mvdy, mvdy_q8; |
| |
| mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8); |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8; |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| |
| mvd = (mvdx > mvdy) ? mvdx : mvdy; |
| |
| if(mvd > mvd_limit) |
| { |
| ps_cur_cluster_32->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 8: |
| { |
| S32 mvd, mvd_q8; |
| |
| mvd_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8; |
| mvd = (mvd_q8 + (1 << 7)) >> 8; |
| |
| if(mvd > (mvd_limit)) |
| { |
| ps_cur_cluster_32->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 9: |
| { |
| S32 mvd; |
| S32 mvdx, mvdx_q8; |
| S32 mvdy, mvdy_q8; |
| |
| mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8); |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| |
| mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8; |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvd = (mvdx > mvdy) ? mvdx : mvdy; |
| |
| if(mvd > mvd_limit) |
| { |
| ps_cur_cluster_32->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 10: |
| { |
| S32 mvd; |
| S32 mvdx, mvdx_q8; |
| S32 mvdy, mvdy_q8; |
| |
| mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8; |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| |
| mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8; |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvd = (mvdx > mvdy) ? mvdx : mvdy; |
| |
| if(mvd > ps_cur_cluster_32->max_dist_from_centroid) |
| { |
| ps_cur_cluster_32->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| default: |
| { |
| break; |
| } |
| } |
| |
| i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_32->num_mvs) + |
| ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs); |
| i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_32->num_mvs) + |
| ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs); |
| |
| ps_cur_cluster_32->num_mvs += ps_cluster_data->num_mvs; |
| |
| ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_32->num_mvs); |
| ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_32->num_mvs); |
| } |
| else if(num_clusters < MAX_NUM_CLUSTERS_32x32) |
| { |
| ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[num_clusters]; |
| |
| ps_blk_32x32->num_clusters++; |
| ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++; |
| |
| ps_cur_cluster_32->is_valid_cluster = 1; |
| |
| ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels; |
| ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; |
| ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; |
| |
| memcpy( |
| ps_cur_cluster_32->as_mv, |
| ps_cluster_data->as_mv, |
| sizeof(mv_data_t) * ps_cluster_data->num_mvs); |
| |
| ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs; |
| |
| ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id; |
| |
| ps_cur_cluster_32->max_x = ps_cluster_data->max_x; |
| ps_cur_cluster_32->max_y = ps_cluster_data->max_y; |
| ps_cur_cluster_32->min_x = ps_cluster_data->min_x; |
| ps_cur_cluster_32->min_y = ps_cluster_data->min_y; |
| |
| ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid; |
| } |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_update_64x64_cluster_attributes |
| * ( |
| * cluster_64x64_blk_t *ps_blk_32x32, |
| * cluster_data_t *ps_cluster_data |
| * ) |
| * |
| * @brief Updates attributes for 64x64 clusters based on the attributes of |
| * the constituent 16x16 clusters |
| * |
| * @param[out] ps_blk_64x64: structure containing 64x64 block results |
| * |
| * @param[in] ps_cluster_data : structure containing 32x32 block results |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| void hme_update_64x64_cluster_attributes( |
| cluster_64x64_blk_t *ps_blk_64x64, cluster_data_t *ps_cluster_data) |
| { |
| cluster_data_t *ps_cur_cluster_64; |
| |
| S32 i; |
| S32 mvd_limit; |
| |
| S32 num_clusters = ps_blk_64x64->num_clusters; |
| |
| if(0 == num_clusters) |
| { |
| ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[0]; |
| |
| ps_blk_64x64->num_clusters++; |
| ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++; |
| |
| ps_cur_cluster_64->is_valid_cluster = 1; |
| |
| ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels; |
| ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; |
| ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; |
| |
| memcpy( |
| ps_cur_cluster_64->as_mv, |
| ps_cluster_data->as_mv, |
| sizeof(mv_data_t) * ps_cluster_data->num_mvs); |
| |
| ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs; |
| |
| ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id; |
| |
| ps_cur_cluster_64->max_x = ps_cluster_data->max_x; |
| ps_cur_cluster_64->max_y = ps_cluster_data->max_y; |
| ps_cur_cluster_64->min_x = ps_cluster_data->min_x; |
| ps_cur_cluster_64->min_y = ps_cluster_data->min_y; |
| |
| ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid; |
| } |
| else |
| { |
| centroid_t *ps_centroid; |
| |
| S32 cur_posx_q8, cur_posy_q8; |
| S32 min_mvd_cluster_id = -1; |
| S32 mvd; |
| S32 mvdx; |
| S32 mvdy; |
| S32 mvdx_min; |
| S32 mvdy_min; |
| S32 mvdx_q8; |
| S32 mvdy_q8; |
| |
| S32 num_clusters_evaluated = 0; |
| |
| S32 mvd_min = MAX_32BIT_VAL; |
| |
| S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8; |
| S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8; |
| |
| for(i = 0; num_clusters_evaluated < num_clusters; i++) |
| { |
| ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[i]; |
| |
| if(ps_cur_cluster_64->ref_id != ps_cluster_data->ref_id) |
| { |
| num_clusters_evaluated++; |
| continue; |
| } |
| |
| if(!ps_cur_cluster_64->is_valid_cluster) |
| { |
| continue; |
| } |
| |
| num_clusters_evaluated++; |
| |
| ps_centroid = &ps_cur_cluster_64->s_centroid; |
| |
| cur_posx_q8 = ps_centroid->i4_pos_x_q8; |
| cur_posy_q8 = ps_centroid->i4_pos_y_q8; |
| |
| mvdx_q8 = cur_posx_q8 - mvx_inp_q8; |
| mvdy_q8 = cur_posy_q8 - mvy_inp_q8; |
| |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvd = ABS(mvdx) + ABS(mvdy); |
| |
| if(mvd < mvd_min) |
| { |
| mvd_min = mvd; |
| mvdx_min = mvdx; |
| mvdy_min = mvdy; |
| min_mvd_cluster_id = i; |
| } |
| } |
| |
| ps_cur_cluster_64 = ps_blk_64x64->as_cluster_data; |
| |
| mvd_limit = (min_mvd_cluster_id == -1) |
| ? ps_cur_cluster_64[0].max_dist_from_centroid |
| : ps_cur_cluster_64[min_mvd_cluster_id].max_dist_from_centroid; |
| |
| if(mvd_min <= mvd_limit) |
| { |
| LWORD64 i8_updated_posx; |
| LWORD64 i8_updated_posy; |
| WORD32 minmax_updated_x = 0; |
| WORD32 minmax_updated_y = 0; |
| |
| ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[min_mvd_cluster_id]; |
| |
| ps_centroid = &ps_cur_cluster_64->s_centroid; |
| |
| ps_cur_cluster_64->is_valid_cluster = 1; |
| |
| ps_cur_cluster_64->area_in_pixels += ps_cluster_data->area_in_pixels; |
| ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; |
| ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; |
| |
| memcpy( |
| &ps_cur_cluster_64->as_mv[ps_cur_cluster_64->num_mvs], |
| ps_cluster_data->as_mv, |
| sizeof(mv_data_t) * ps_cluster_data->num_mvs); |
| |
| if((mvdx_min > 0) && ((ps_cur_cluster_64->min_x << 8) > mvx_inp_q8)) |
| { |
| ps_cur_cluster_64->min_x = (mvx_inp_q8 + (1 << 7)) >> 8; |
| minmax_updated_x = 1; |
| } |
| else if((mvdx_min < 0) && ((ps_cur_cluster_64->max_x << 8) < mvx_inp_q8)) |
| { |
| ps_cur_cluster_64->max_x = (mvx_inp_q8 + (1 << 7)) >> 8; |
| minmax_updated_x = 2; |
| } |
| |
| if((mvdy_min > 0) && ((ps_cur_cluster_64->min_y << 8) > mvy_inp_q8)) |
| { |
| ps_cur_cluster_64->min_y = (mvy_inp_q8 + (1 << 7)) >> 8; |
| minmax_updated_y = 1; |
| } |
| else if((mvdy_min < 0) && ((ps_cur_cluster_64->max_y << 8) < mvy_inp_q8)) |
| { |
| ps_cur_cluster_64->max_y = (mvy_inp_q8 + (1 << 7)) >> 8; |
| minmax_updated_y = 2; |
| } |
| |
| switch((minmax_updated_y << 2) + minmax_updated_x) |
| { |
| case 1: |
| { |
| S32 mvd, mvd_q8; |
| |
| mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8); |
| mvd = (mvd_q8 + (1 << 7)) >> 8; |
| |
| if(mvd > (mvd_limit)) |
| { |
| ps_cur_cluster_64->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 2: |
| { |
| S32 mvd, mvd_q8; |
| |
| mvd_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8; |
| mvd = (mvd_q8 + (1 << 7)) >> 8; |
| |
| if(mvd > (mvd_limit)) |
| { |
| ps_cur_cluster_64->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 4: |
| { |
| S32 mvd, mvd_q8; |
| |
| mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8); |
| mvd = (mvd_q8 + (1 << 7)) >> 8; |
| |
| if(mvd > (mvd_limit)) |
| { |
| ps_cur_cluster_64->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 5: |
| { |
| S32 mvd; |
| S32 mvdx, mvdx_q8; |
| S32 mvdy, mvdy_q8; |
| |
| mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8); |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8); |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| |
| mvd = (mvdx > mvdy) ? mvdx : mvdy; |
| |
| if(mvd > mvd_limit) |
| { |
| ps_cur_cluster_64->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 6: |
| { |
| S32 mvd; |
| S32 mvdx, mvdx_q8; |
| S32 mvdy, mvdy_q8; |
| |
| mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8); |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8; |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| |
| mvd = (mvdx > mvdy) ? mvdx : mvdy; |
| |
| if(mvd > mvd_limit) |
| { |
| ps_cur_cluster_64->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 8: |
| { |
| S32 mvd, mvd_q8; |
| |
| mvd_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8; |
| mvd = (mvd_q8 + (1 << 7)) >> 8; |
| |
| if(mvd > (mvd_limit)) |
| { |
| ps_cur_cluster_64->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 9: |
| { |
| S32 mvd; |
| S32 mvdx, mvdx_q8; |
| S32 mvdy, mvdy_q8; |
| |
| mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8); |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| |
| mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8; |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvd = (mvdx > mvdy) ? mvdx : mvdy; |
| |
| if(mvd > mvd_limit) |
| { |
| ps_cur_cluster_64->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 10: |
| { |
| S32 mvd; |
| S32 mvdx, mvdx_q8; |
| S32 mvdy, mvdy_q8; |
| |
| mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8; |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| |
| mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8; |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvd = (mvdx > mvdy) ? mvdx : mvdy; |
| |
| if(mvd > ps_cur_cluster_64->max_dist_from_centroid) |
| { |
| ps_cur_cluster_64->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| default: |
| { |
| break; |
| } |
| } |
| |
| i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_64->num_mvs) + |
| ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs); |
| i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_64->num_mvs) + |
| ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs); |
| |
| ps_cur_cluster_64->num_mvs += ps_cluster_data->num_mvs; |
| |
| ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_64->num_mvs); |
| ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_64->num_mvs); |
| } |
| else if(num_clusters < MAX_NUM_CLUSTERS_64x64) |
| { |
| ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[num_clusters]; |
| |
| ps_blk_64x64->num_clusters++; |
| ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++; |
| |
| ps_cur_cluster_64->is_valid_cluster = 1; |
| |
| ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels; |
| ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area; |
| ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area; |
| |
| memcpy( |
| &ps_cur_cluster_64->as_mv[0], |
| ps_cluster_data->as_mv, |
| sizeof(mv_data_t) * ps_cluster_data->num_mvs); |
| |
| ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs; |
| |
| ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id; |
| |
| ps_cur_cluster_64->max_x = ps_cluster_data->max_x; |
| ps_cur_cluster_64->max_y = ps_cluster_data->max_y; |
| ps_cur_cluster_64->min_x = ps_cluster_data->min_x; |
| ps_cur_cluster_64->min_y = ps_cluster_data->min_y; |
| |
| ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid; |
| } |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_update_32x32_clusters |
| * ( |
| * cluster_32x32_blk_t *ps_blk_32x32, |
| * cluster_16x16_blk_t *ps_blk_16x16 |
| * ) |
| * |
| * @brief Updates attributes for 32x32 clusters based on the attributes of |
| * the constituent 16x16 clusters |
| * |
| * @param[out] ps_blk_32x32: structure containing 32x32 block results |
| * |
| * @param[in] ps_blk_16x16 : structure containing 16x16 block results |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| static __inline void |
| hme_update_32x32_clusters(cluster_32x32_blk_t *ps_blk_32x32, cluster_16x16_blk_t *ps_blk_16x16) |
| { |
| cluster_16x16_blk_t *ps_blk_16x16_cur; |
| cluster_data_t *ps_cur_cluster; |
| |
| S32 i, j; |
| S32 num_clusters_cur_16x16_blk; |
| |
| for(i = 0; i < 4; i++) |
| { |
| S32 num_clusters_evaluated = 0; |
| |
| ps_blk_16x16_cur = &ps_blk_16x16[i]; |
| |
| num_clusters_cur_16x16_blk = ps_blk_16x16_cur->num_clusters; |
| |
| ps_blk_32x32->intra_mv_area += ps_blk_16x16_cur->intra_mv_area; |
| |
| ps_blk_32x32->best_inter_cost += ps_blk_16x16_cur->best_inter_cost; |
| |
| for(j = 0; num_clusters_evaluated < num_clusters_cur_16x16_blk; j++) |
| { |
| ps_cur_cluster = &ps_blk_16x16_cur->as_cluster_data[j]; |
| |
| if(!ps_cur_cluster->is_valid_cluster) |
| { |
| continue; |
| } |
| |
| hme_update_32x32_cluster_attributes(ps_blk_32x32, ps_cur_cluster); |
| |
| num_clusters_evaluated++; |
| } |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_update_64x64_clusters |
| * ( |
| * cluster_64x64_blk_t *ps_blk_64x64, |
| * cluster_32x32_blk_t *ps_blk_32x32 |
| * ) |
| * |
| * @brief Updates attributes for 64x64 clusters based on the attributes of |
| * the constituent 16x16 clusters |
| * |
| * @param[out] ps_blk_64x64: structure containing 32x32 block results |
| * |
| * @param[in] ps_blk_32x32 : structure containing 16x16 block results |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| static __inline void |
| hme_update_64x64_clusters(cluster_64x64_blk_t *ps_blk_64x64, cluster_32x32_blk_t *ps_blk_32x32) |
| { |
| cluster_32x32_blk_t *ps_blk_32x32_cur; |
| cluster_data_t *ps_cur_cluster; |
| |
| S32 i, j; |
| S32 num_clusters_cur_32x32_blk; |
| |
| for(i = 0; i < 4; i++) |
| { |
| S32 num_clusters_evaluated = 0; |
| |
| ps_blk_32x32_cur = &ps_blk_32x32[i]; |
| |
| num_clusters_cur_32x32_blk = ps_blk_32x32_cur->num_clusters; |
| |
| ps_blk_64x64->intra_mv_area += ps_blk_32x32_cur->intra_mv_area; |
| ps_blk_64x64->best_inter_cost += ps_blk_32x32_cur->best_inter_cost; |
| |
| for(j = 0; num_clusters_evaluated < num_clusters_cur_32x32_blk; j++) |
| { |
| ps_cur_cluster = &ps_blk_32x32_cur->as_cluster_data[j]; |
| |
| if(!ps_cur_cluster->is_valid_cluster) |
| { |
| continue; |
| } |
| |
| hme_update_64x64_cluster_attributes(ps_blk_64x64, ps_cur_cluster); |
| |
| num_clusters_evaluated++; |
| } |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_try_merge_clusters_blksize_gt_16 |
| * ( |
| * cluster_data_t *ps_cluster_data, |
| * S32 num_clusters |
| * ) |
| * |
| * @brief Merging clusters from blocks of size 32x32 and greater |
| * |
| * @param[in/out] ps_cluster_data: structure containing cluster data |
| * |
| * @param[in/out] pi4_num_clusters : pointer to number of clusters |
| * |
| * @return Success or failure |
| ******************************************************************************** |
| */ |
| S32 hme_try_merge_clusters_blksize_gt_16(cluster_data_t *ps_cluster_data, S32 num_clusters) |
| { |
| centroid_t *ps_cur_centroid; |
| cluster_data_t *ps_cur_cluster; |
| |
| S32 i, mvd; |
| S32 mvdx, mvdy, mvdx_q8, mvdy_q8; |
| |
| centroid_t *ps_centroid = &ps_cluster_data->s_centroid; |
| |
| S32 mvd_limit = ps_cluster_data->max_dist_from_centroid; |
| S32 ref_id = ps_cluster_data->ref_id; |
| |
| S32 node0_posx_q8 = ps_centroid->i4_pos_x_q8; |
| S32 node0_posy_q8 = ps_centroid->i4_pos_y_q8; |
| S32 num_clusters_evaluated = 1; |
| S32 ret_value = 0; |
| |
| if(1 >= num_clusters) |
| { |
| return ret_value; |
| } |
| |
| for(i = 1; num_clusters_evaluated < num_clusters; i++) |
| { |
| S32 cur_posx_q8; |
| S32 cur_posy_q8; |
| |
| ps_cur_cluster = &ps_cluster_data[i]; |
| |
| if((ref_id != ps_cur_cluster->ref_id)) |
| { |
| num_clusters_evaluated++; |
| continue; |
| } |
| |
| if((!ps_cur_cluster->is_valid_cluster)) |
| { |
| continue; |
| } |
| |
| num_clusters_evaluated++; |
| |
| ps_cur_centroid = &ps_cur_cluster->s_centroid; |
| |
| cur_posx_q8 = ps_cur_centroid->i4_pos_x_q8; |
| cur_posy_q8 = ps_cur_centroid->i4_pos_y_q8; |
| |
| mvdx_q8 = cur_posx_q8 - node0_posx_q8; |
| mvdy_q8 = cur_posy_q8 - node0_posy_q8; |
| |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvd = ABS(mvdx) + ABS(mvdy); |
| |
| if(mvd <= (mvd_limit >> 1)) |
| { |
| LWORD64 i8_updated_posx; |
| LWORD64 i8_updated_posy; |
| WORD32 minmax_updated_x = 0; |
| WORD32 minmax_updated_y = 0; |
| |
| ps_cur_cluster->is_valid_cluster = 0; |
| |
| ps_cluster_data->area_in_pixels += ps_cur_cluster->area_in_pixels; |
| ps_cluster_data->bi_mv_pixel_area += ps_cur_cluster->bi_mv_pixel_area; |
| ps_cluster_data->uni_mv_pixel_area += ps_cur_cluster->uni_mv_pixel_area; |
| |
| memcpy( |
| &ps_cluster_data->as_mv[ps_cluster_data->num_mvs], |
| ps_cur_cluster->as_mv, |
| sizeof(mv_data_t) * ps_cur_cluster->num_mvs); |
| |
| if(mvdx > 0) |
| { |
| ps_cluster_data->min_x = (cur_posx_q8 + (1 << 7)) >> 8; |
| minmax_updated_x = 1; |
| } |
| else |
| { |
| ps_cluster_data->max_x = (cur_posx_q8 + (1 << 7)) >> 8; |
| minmax_updated_x = 2; |
| } |
| |
| if(mvdy > 0) |
| { |
| ps_cluster_data->min_y = (cur_posy_q8 + (1 << 7)) >> 8; |
| minmax_updated_y = 1; |
| } |
| else |
| { |
| ps_cluster_data->max_y = (cur_posy_q8 + (1 << 7)) >> 8; |
| minmax_updated_y = 2; |
| } |
| |
| switch((minmax_updated_y << 2) + minmax_updated_x) |
| { |
| case 1: |
| { |
| S32 mvd, mvd_q8; |
| |
| mvd_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8); |
| mvd = (mvd_q8 + (1 << 7)) >> 8; |
| |
| if(mvd > (mvd_limit)) |
| { |
| ps_cluster_data->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 2: |
| { |
| S32 mvd, mvd_q8; |
| |
| mvd_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8; |
| mvd = (mvd_q8 + (1 << 7)) >> 8; |
| |
| if(mvd > (mvd_limit)) |
| { |
| ps_cluster_data->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 4: |
| { |
| S32 mvd, mvd_q8; |
| |
| mvd_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8); |
| mvd = (mvd_q8 + (1 << 7)) >> 8; |
| |
| if(mvd > (mvd_limit)) |
| { |
| ps_cluster_data->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 5: |
| { |
| S32 mvd; |
| S32 mvdx, mvdx_q8; |
| S32 mvdy, mvdy_q8; |
| |
| mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8); |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8); |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| |
| mvd = (mvdx > mvdy) ? mvdx : mvdy; |
| |
| if(mvd > mvd_limit) |
| { |
| ps_cluster_data->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 6: |
| { |
| S32 mvd; |
| S32 mvdx, mvdx_q8; |
| S32 mvdy, mvdy_q8; |
| |
| mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8); |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8; |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| |
| mvd = (mvdx > mvdy) ? mvdx : mvdy; |
| |
| if(mvd > mvd_limit) |
| { |
| ps_cluster_data->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 8: |
| { |
| S32 mvd, mvd_q8; |
| |
| mvd_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8; |
| mvd = (mvd_q8 + (1 << 7)) >> 8; |
| |
| if(mvd > (mvd_limit)) |
| { |
| ps_cluster_data->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 9: |
| { |
| S32 mvd; |
| S32 mvdx, mvdx_q8; |
| S32 mvdy, mvdy_q8; |
| |
| mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8); |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| |
| mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8; |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvd = (mvdx > mvdy) ? mvdx : mvdy; |
| |
| if(mvd > mvd_limit) |
| { |
| ps_cluster_data->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| case 10: |
| { |
| S32 mvd; |
| S32 mvdx, mvdx_q8; |
| S32 mvdy, mvdy_q8; |
| |
| mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8; |
| mvdx = (mvdx_q8 + (1 << 7)) >> 8; |
| |
| mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8; |
| mvdy = (mvdy_q8 + (1 << 7)) >> 8; |
| |
| mvd = (mvdx > mvdy) ? mvdx : mvdy; |
| |
| if(mvd > ps_cluster_data->max_dist_from_centroid) |
| { |
| ps_cluster_data->max_dist_from_centroid = mvd; |
| } |
| break; |
| } |
| default: |
| { |
| break; |
| } |
| } |
| |
| i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cluster_data->num_mvs) + |
| ((LWORD64)cur_posx_q8 * ps_cur_cluster->num_mvs); |
| i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cluster_data->num_mvs) + |
| ((LWORD64)cur_posy_q8 * ps_cur_cluster->num_mvs); |
| |
| ps_cluster_data->num_mvs += ps_cur_cluster->num_mvs; |
| |
| ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cluster_data->num_mvs); |
| ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cluster_data->num_mvs); |
| |
| if(MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK >= num_clusters) |
| { |
| num_clusters--; |
| num_clusters_evaluated = 1; |
| i = 0; |
| ret_value++; |
| } |
| else |
| { |
| ret_value++; |
| |
| return ret_value; |
| } |
| } |
| } |
| |
| if(ret_value) |
| { |
| for(i = 1; i < (num_clusters + ret_value); i++) |
| { |
| if(ps_cluster_data[i].is_valid_cluster) |
| { |
| break; |
| } |
| } |
| if(i == (num_clusters + ret_value)) |
| { |
| return ret_value; |
| } |
| } |
| else |
| { |
| i = 1; |
| } |
| |
| return (hme_try_merge_clusters_blksize_gt_16(&ps_cluster_data[i], num_clusters - 1)) + |
| ret_value; |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn S32 hme_determine_validity_32x32 |
| * ( |
| * ctb_cluster_info_t *ps_ctb_cluster_info |
| * ) |
| * |
| * @brief Determines whther current 32x32 block needs to be evaluated in enc_loop |
| * while recursing through the CU tree or not |
| * |
| * @param[in] ps_cluster_data: structure containing cluster data |
| * |
| * @return Success or failure |
| ******************************************************************************** |
| */ |
| __inline S32 hme_determine_validity_32x32( |
| ctb_cluster_info_t *ps_ctb_cluster_info, |
| S32 *pi4_children_nodes_required, |
| S32 blk_validity_wrt_pic_bndry, |
| S32 parent_blk_validity_wrt_pic_bndry) |
| { |
| cluster_data_t *ps_data; |
| |
| cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk; |
| cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk; |
| |
| S32 num_clusters = ps_32x32_blk->num_clusters; |
| S32 num_clusters_parent = ps_64x64_blk->num_clusters; |
| |
| if(!blk_validity_wrt_pic_bndry) |
| { |
| *pi4_children_nodes_required = 1; |
| return 0; |
| } |
| |
| if(!parent_blk_validity_wrt_pic_bndry) |
| { |
| *pi4_children_nodes_required = 1; |
| return 1; |
| } |
| |
| if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) |
| { |
| *pi4_children_nodes_required = 1; |
| return 0; |
| } |
| |
| if(num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK) |
| { |
| *pi4_children_nodes_required = 1; |
| |
| return 1; |
| } |
| else if(num_clusters_parent < MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK) |
| { |
| *pi4_children_nodes_required = 0; |
| |
| return 1; |
| } |
| else |
| { |
| if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) |
| { |
| *pi4_children_nodes_required = 0; |
| return 1; |
| } |
| else |
| { |
| S32 i; |
| |
| S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 4; |
| S32 min_area = MAX_32BIT_VAL; |
| S32 num_clusters_evaluated = 0; |
| |
| for(i = 0; num_clusters_evaluated < num_clusters; i++) |
| { |
| ps_data = &ps_32x32_blk->as_cluster_data[i]; |
| |
| if(!ps_data->is_valid_cluster) |
| { |
| continue; |
| } |
| |
| num_clusters_evaluated++; |
| |
| if(ps_data->area_in_pixels < min_area) |
| { |
| min_area = ps_data->area_in_pixels; |
| } |
| } |
| |
| if((min_area << 4) < area_of_parent) |
| { |
| *pi4_children_nodes_required = 1; |
| return 0; |
| } |
| else |
| { |
| *pi4_children_nodes_required = 0; |
| return 1; |
| } |
| } |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn S32 hme_determine_validity_16x16 |
| * ( |
| * ctb_cluster_info_t *ps_ctb_cluster_info |
| * ) |
| * |
| * @brief Determines whther current 16x16 block needs to be evaluated in enc_loop |
| * while recursing through the CU tree or not |
| * |
| * @param[in] ps_cluster_data: structure containing cluster data |
| * |
| * @return Success or failure |
| ******************************************************************************** |
| */ |
| __inline S32 hme_determine_validity_16x16( |
| ctb_cluster_info_t *ps_ctb_cluster_info, |
| S32 *pi4_children_nodes_required, |
| S32 blk_validity_wrt_pic_bndry, |
| S32 parent_blk_validity_wrt_pic_bndry) |
| { |
| cluster_data_t *ps_data; |
| |
| cluster_16x16_blk_t *ps_16x16_blk = ps_ctb_cluster_info->ps_16x16_blk; |
| cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk; |
| cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk; |
| |
| S32 num_clusters = ps_16x16_blk->num_clusters; |
| S32 num_clusters_parent = ps_32x32_blk->num_clusters; |
| S32 num_clusters_grandparent = ps_64x64_blk->num_clusters; |
| |
| if(!blk_validity_wrt_pic_bndry) |
| { |
| *pi4_children_nodes_required = 1; |
| return 0; |
| } |
| |
| if(!parent_blk_validity_wrt_pic_bndry) |
| { |
| *pi4_children_nodes_required = 1; |
| return 1; |
| } |
| |
| if((num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) && |
| (num_clusters_grandparent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)) |
| { |
| *pi4_children_nodes_required = 1; |
| return 1; |
| } |
| |
| /* Implies nc_64 <= 3 when num_clusters_parent > 3 & */ |
| /* implies nc_64 > 3 when num_clusters_parent < 3 & */ |
| if(num_clusters_parent != MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) |
| { |
| if(num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK) |
| { |
| *pi4_children_nodes_required = 0; |
| |
| return 1; |
| } |
| else |
| { |
| *pi4_children_nodes_required = 1; |
| |
| return 0; |
| } |
| } |
| /* Implies nc_64 >= 3 */ |
| else |
| { |
| if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK) |
| { |
| *pi4_children_nodes_required = 0; |
| return 1; |
| } |
| else if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK) |
| { |
| *pi4_children_nodes_required = 1; |
| return 0; |
| } |
| else |
| { |
| S32 i; |
| |
| S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 2; |
| S32 min_area = MAX_32BIT_VAL; |
| S32 num_clusters_evaluated = 0; |
| |
| for(i = 0; num_clusters_evaluated < num_clusters; i++) |
| { |
| ps_data = &ps_16x16_blk->as_cluster_data[i]; |
| |
| if(!ps_data->is_valid_cluster) |
| { |
| continue; |
| } |
| |
| num_clusters_evaluated++; |
| |
| if(ps_data->area_in_pixels < min_area) |
| { |
| min_area = ps_data->area_in_pixels; |
| } |
| } |
| |
| if((min_area << 4) < area_of_parent) |
| { |
| *pi4_children_nodes_required = 1; |
| return 0; |
| } |
| else |
| { |
| *pi4_children_nodes_required = 0; |
| return 1; |
| } |
| } |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_build_cu_tree |
| * ( |
| * ctb_cluster_info_t *ps_ctb_cluster_info, |
| * cur_ctb_cu_tree_t *ps_cu_tree, |
| * S32 tree_depth, |
| * CU_POS_T e_grand_parent_blk_pos, |
| * CU_POS_T e_parent_blk_pos, |
| * CU_POS_T e_cur_blk_pos |
| * ) |
| * |
| * @brief Recursive function for CU tree initialisation |
| * |
| * @param[in] ps_ctb_cluster_info: structure containing pointers to clusters |
| * corresponding to all block sizes from 64x64 |
| * to 16x16 |
| * |
| * @param[in] e_parent_blk_pos: position of parent block wrt its parent, if |
| * applicable |
| * |
| * @param[in] e_cur_blk_pos: position of current block wrt parent |
| * |
| * @param[out] ps_cu_tree : represents CU tree used in CU recursion |
| * |
| * @param[in] tree_depth : specifies depth of the CU tree |
| * |
| * @return Nothing |
| ******************************************************************************** |
| */ |
| void hme_build_cu_tree( |
| ctb_cluster_info_t *ps_ctb_cluster_info, |
| cur_ctb_cu_tree_t *ps_cu_tree, |
| S32 tree_depth, |
| CU_POS_T e_grandparent_blk_pos, |
| CU_POS_T e_parent_blk_pos, |
| CU_POS_T e_cur_blk_pos) |
| { |
| ihevce_cu_tree_init( |
| ps_cu_tree, |
| ps_ctb_cluster_info->ps_cu_tree_root, |
| &ps_ctb_cluster_info->nodes_created_in_cu_tree, |
| tree_depth, |
| e_grandparent_blk_pos, |
| e_parent_blk_pos, |
| e_cur_blk_pos); |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn S32 hme_sdi_based_cluster_spread_eligibility |
| * ( |
| * cluster_32x32_blk_t *ps_blk_32x32 |
| * ) |
| * |
| * @brief Determines whether the spread of high SDI MV's around each cluster |
| * center is below a pre-determined threshold |
| * |
| * @param[in] ps_blk_32x32: structure containing pointers to clusters |
| * corresponding to all block sizes from 64x64 |
| * to 16x16 |
| * |
| * @return 1 if the spread is constrained, else 0 |
| ******************************************************************************** |
| */ |
| __inline S32 |
| hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t *ps_blk_32x32, S32 sdi_threshold) |
| { |
| S32 cumulative_mv_distance; |
| S32 i, j; |
| S32 num_high_sdi_mvs; |
| |
| S32 num_clusters = ps_blk_32x32->num_clusters; |
| |
| for(i = 0; i < num_clusters; i++) |
| { |
| cluster_data_t *ps_data = &ps_blk_32x32->as_cluster_data[i]; |
| |
| num_high_sdi_mvs = 0; |
| cumulative_mv_distance = 0; |
| |
| for(j = 0; j < ps_data->num_mvs; j++) |
| { |
| mv_data_t *ps_mv = &ps_data->as_mv[j]; |
| |
| if(ps_mv->sdi >= sdi_threshold) |
| { |
| num_high_sdi_mvs++; |
| |
| COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance); |
| } |
| } |
| |
| if(cumulative_mv_distance > ((ps_data->max_dist_from_centroid >> 1) * num_high_sdi_mvs)) |
| { |
| return 0; |
| } |
| } |
| |
| return 1; |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn S32 hme_populate_cu_tree |
| * ( |
| * ctb_cluster_info_t *ps_ctb_cluster_info, |
| * ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb, |
| * cur_ctb_cu_tree_t *ps_cu_tree, |
| * S32 tree_depth, |
| * CU_POS_T e_parent_blk_pos, |
| * CU_POS_T e_cur_blk_pos |
| * ) |
| * |
| * @brief Recursive function for CU tree population based on output of |
| * clustering algorithm |
| * |
| * @param[in] ps_ctb_cluster_info: structure containing pointers to clusters |
| * corresponding to all block sizes from 64x64 |
| * to 16x16 |
| * |
| * @param[in] e_parent_blk_pos: position of parent block wrt its parent, if |
| applicable |
| * |
| * @param[in] e_cur_blk_pos: position of current block wrt parent |
| * |
| * @param[in] ps_cur_ipe_ctb : output container for ipe analyses |
| * |
| * @param[out] ps_cu_tree : represents CU tree used in CU recursion |
| * |
| * @param[in] tree_depth : specifies depth of the CU tree |
| * |
| * @param[in] ipe_decision_precedence : specifies whether precedence should |
| * be given to decisions made either by IPE(1) or clustering algos. |
| * |
| * @return 1 if re-evaluation of parent node's validity is not required, |
| else 0 |
| ******************************************************************************** |
| */ |
| void hme_populate_cu_tree( |
| ctb_cluster_info_t *ps_ctb_cluster_info, |
| cur_ctb_cu_tree_t *ps_cu_tree, |
| S32 tree_depth, |
| ME_QUALITY_PRESETS_T e_quality_preset, |
| CU_POS_T e_grandparent_blk_pos, |
| CU_POS_T e_parent_blk_pos, |
| CU_POS_T e_cur_blk_pos) |
| { |
| S32 area_of_cur_blk; |
| S32 area_limit_for_me_decision_precedence; |
| S32 children_nodes_required; |
| S32 intra_mv_area; |
| S32 intra_eval_enable; |
| S32 inter_eval_enable; |
| S32 ipe_decision_precedence; |
| S32 node_validity; |
| S32 num_clusters; |
| |
| ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb = ps_ctb_cluster_info->ps_cur_ipe_ctb; |
| |
| if(NULL == ps_cu_tree) |
| { |
| return; |
| } |
| |
| switch(tree_depth) |
| { |
| case 0: |
| { |
| /* 64x64 block */ |
| S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask; |
| |
| cluster_64x64_blk_t *ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk; |
| |
| area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 4; |
| area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100; |
| children_nodes_required = 0; |
| intra_mv_area = ps_blk_64x64->intra_mv_area; |
| |
| ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence); |
| |
| intra_eval_enable = ipe_decision_precedence; |
| inter_eval_enable = !!ps_blk_64x64->num_clusters; |
| |
| #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS |
| if(e_quality_preset >= ME_HIGH_QUALITY) |
| { |
| inter_eval_enable = 1; |
| node_validity = (blk_32x32_mask == 0xf); |
| #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS |
| ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk); |
| #endif |
| break; |
| } |
| #endif |
| |
| #if ENABLE_4CTB_EVALUATION |
| node_validity = (blk_32x32_mask == 0xf); |
| |
| break; |
| #else |
| { |
| S32 i; |
| |
| num_clusters = ps_blk_64x64->num_clusters; |
| |
| node_validity = (ipe_decision_precedence) |
| ? (!ps_cur_ipe_ctb->u1_split_flag) |
| : (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK); |
| |
| for(i = 0; i < MAX_NUM_REF; i++) |
| { |
| node_validity = node_validity && (ps_blk_64x64->au1_num_clusters[i] <= |
| MAX_NUM_CLUSTERS_IN_ONE_REF_IDX); |
| } |
| |
| node_validity = node_validity && (blk_32x32_mask == 0xf); |
| } |
| break; |
| #endif |
| } |
| case 1: |
| { |
| /* 32x32 block */ |
| S32 is_percent_intra_area_gt_threshold; |
| |
| cluster_32x32_blk_t *ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cur_blk_pos]; |
| |
| S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask; |
| |
| #if !ENABLE_4CTB_EVALUATION |
| S32 best_inter_cost = ps_blk_32x32->best_inter_cost; |
| S32 best_intra_cost = |
| ((ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] + |
| ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier * |
| 4) < 0) |
| ? MAX_32BIT_VAL |
| : (ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] + |
| ps_ctb_cluster_info->i4_frame_qstep * |
| ps_ctb_cluster_info->i4_frame_qstep_multiplier * 4); |
| S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost; |
| S32 cost_differential = (best_inter_cost - best_cost); |
| #endif |
| |
| area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 2; |
| area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100; |
| intra_mv_area = ps_blk_32x32->intra_mv_area; |
| is_percent_intra_area_gt_threshold = |
| (intra_mv_area > area_limit_for_me_decision_precedence); |
| ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence); |
| |
| intra_eval_enable = ipe_decision_precedence; |
| inter_eval_enable = !!ps_blk_32x32->num_clusters; |
| children_nodes_required = 1; |
| |
| #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS |
| if(e_quality_preset >= ME_HIGH_QUALITY) |
| { |
| inter_eval_enable = 1; |
| node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0); |
| #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS |
| ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk); |
| #endif |
| break; |
| } |
| #endif |
| |
| #if ENABLE_4CTB_EVALUATION |
| node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0); |
| |
| break; |
| #else |
| { |
| S32 i; |
| num_clusters = ps_blk_32x32->num_clusters; |
| |
| if(ipe_decision_precedence) |
| { |
| node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag); |
| node_validity = node_validity && (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0); |
| } |
| else |
| { |
| node_validity = |
| ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)) && |
| (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) && |
| (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0); |
| |
| for(i = 0; (i < MAX_NUM_REF) && (node_validity); i++) |
| { |
| node_validity = node_validity && (ps_blk_32x32->au1_num_clusters[i] <= |
| MAX_NUM_CLUSTERS_IN_ONE_REF_IDX); |
| } |
| |
| if(node_validity) |
| { |
| node_validity = node_validity && |
| hme_sdi_based_cluster_spread_eligibility( |
| ps_blk_32x32, ps_ctb_cluster_info->sdi_threshold); |
| } |
| } |
| } |
| |
| break; |
| #endif |
| } |
| case 2: |
| { |
| cluster_16x16_blk_t *ps_blk_16x16 = |
| &ps_ctb_cluster_info->ps_16x16_blk[e_cur_blk_pos + (e_parent_blk_pos << 2)]; |
| |
| S32 blk_8x8_mask = |
| ps_ctb_cluster_info->pi4_blk_8x8_mask[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos]; |
| |
| area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N]; |
| area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100; |
| children_nodes_required = 1; |
| intra_mv_area = ps_blk_16x16->intra_mv_area; |
| ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence); |
| num_clusters = ps_blk_16x16->num_clusters; |
| |
| intra_eval_enable = ipe_decision_precedence; |
| inter_eval_enable = 1; |
| |
| #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS |
| if(e_quality_preset >= ME_HIGH_QUALITY) |
| { |
| node_validity = |
| !ps_ctb_cluster_info |
| ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos]; |
| children_nodes_required = !node_validity; |
| break; |
| } |
| #endif |
| |
| #if ENABLE_4CTB_EVALUATION |
| node_validity = (blk_8x8_mask == 0xf); |
| |
| #if ENABLE_CU_TREE_CULLING |
| { |
| cur_ctb_cu_tree_t *ps_32x32_root; |
| |
| switch(e_parent_blk_pos) |
| { |
| case POS_TL: |
| { |
| ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl; |
| |
| break; |
| } |
| case POS_TR: |
| { |
| ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr; |
| |
| break; |
| } |
| case POS_BL: |
| { |
| ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl; |
| |
| break; |
| } |
| case POS_BR: |
| { |
| ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br; |
| |
| break; |
| } |
| } |
| |
| if(ps_32x32_root->is_node_valid) |
| { |
| node_validity = |
| node_validity && |
| !ps_ctb_cluster_info |
| ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos]; |
| children_nodes_required = !node_validity; |
| } |
| } |
| #endif |
| |
| break; |
| #else |
| |
| if(ipe_decision_precedence) |
| { |
| S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos] |
| .as_intra16_analyse[e_cur_blk_pos] |
| .b1_merge_flag); |
| S32 valid_flag = (blk_8x8_mask == 0xf); |
| |
| node_validity = merge_flag_16 && valid_flag; |
| } |
| else |
| { |
| node_validity = (blk_8x8_mask == 0xf); |
| } |
| |
| break; |
| #endif |
| } |
| case 3: |
| { |
| S32 blk_8x8_mask = |
| ps_ctb_cluster_info |
| ->pi4_blk_8x8_mask[(S32)(e_grandparent_blk_pos << 2) + e_parent_blk_pos]; |
| S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos] |
| .as_intra16_analyse[e_parent_blk_pos] |
| .b1_merge_flag); |
| S32 merge_flag_32 = |
| (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos].b1_merge_flag); |
| |
| intra_eval_enable = !merge_flag_16 || !merge_flag_32; |
| inter_eval_enable = 1; |
| children_nodes_required = 0; |
| |
| #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS |
| if(e_quality_preset >= ME_HIGH_QUALITY) |
| { |
| node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0); |
| break; |
| } |
| #endif |
| |
| #if ENABLE_4CTB_EVALUATION |
| node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0); |
| |
| break; |
| #else |
| { |
| cur_ctb_cu_tree_t *ps_32x32_root; |
| cur_ctb_cu_tree_t *ps_16x16_root; |
| cluster_32x32_blk_t *ps_32x32_blk; |
| |
| switch(e_grandparent_blk_pos) |
| { |
| case POS_TL: |
| { |
| ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl; |
| |
| break; |
| } |
| case POS_TR: |
| { |
| ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr; |
| |
| break; |
| } |
| case POS_BL: |
| { |
| ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl; |
| |
| break; |
| } |
| case POS_BR: |
| { |
| ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br; |
| |
| break; |
| } |
| } |
| |
| switch(e_parent_blk_pos) |
| { |
| case POS_TL: |
| { |
| ps_16x16_root = ps_32x32_root->ps_child_node_tl; |
| |
| break; |
| } |
| case POS_TR: |
| { |
| ps_16x16_root = ps_32x32_root->ps_child_node_tr; |
| |
| break; |
| } |
| case POS_BL: |
| { |
| ps_16x16_root = ps_32x32_root->ps_child_node_bl; |
| |
| break; |
| } |
| case POS_BR: |
| { |
| ps_16x16_root = ps_32x32_root->ps_child_node_br; |
| |
| break; |
| } |
| } |
| |
| ps_32x32_blk = &ps_ctb_cluster_info->ps_32x32_blk[e_grandparent_blk_pos]; |
| |
| node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0) && |
| ((!ps_32x32_root->is_node_valid) || |
| (ps_32x32_blk->num_clusters_with_weak_sdi_density > 0) || |
| (!ps_16x16_root->is_node_valid)); |
| |
| break; |
| } |
| #endif |
| } |
| } |
| |
| /* Fill the current cu_tree node */ |
| ps_cu_tree->is_node_valid = node_validity; |
| ps_cu_tree->u1_intra_eval_enable = intra_eval_enable; |
| ps_cu_tree->u1_inter_eval_enable = inter_eval_enable; |
| |
| if(children_nodes_required) |
| { |
| tree_depth++; |
| |
| hme_populate_cu_tree( |
| ps_ctb_cluster_info, |
| ps_cu_tree->ps_child_node_tl, |
| tree_depth, |
| e_quality_preset, |
| e_parent_blk_pos, |
| e_cur_blk_pos, |
| POS_TL); |
| |
| hme_populate_cu_tree( |
| ps_ctb_cluster_info, |
| ps_cu_tree->ps_child_node_tr, |
| tree_depth, |
| e_quality_preset, |
| e_parent_blk_pos, |
| e_cur_blk_pos, |
| POS_TR); |
| |
| hme_populate_cu_tree( |
| ps_ctb_cluster_info, |
| ps_cu_tree->ps_child_node_bl, |
| tree_depth, |
| e_quality_preset, |
| e_parent_blk_pos, |
| e_cur_blk_pos, |
| POS_BL); |
| |
| hme_populate_cu_tree( |
| ps_ctb_cluster_info, |
| ps_cu_tree->ps_child_node_br, |
| tree_depth, |
| e_quality_preset, |
| e_parent_blk_pos, |
| e_cur_blk_pos, |
| POS_BR); |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_analyse_mv_clustering |
| * ( |
| * search_results_t *ps_search_results, |
| * ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb, |
| * cur_ctb_cu_tree_t *ps_cu_tree |
| * ) |
| * |
| * @brief Implementation for the clustering algorithm |
| * |
| * @param[in] ps_search_results: structure containing 16x16 block results |
| * |
| * @param[in] ps_cur_ipe_ctb : output container for ipe analyses |
| * |
| * @param[out] ps_cu_tree : represents CU tree used in CU recursion |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| void hme_analyse_mv_clustering( |
| search_results_t *ps_search_results, |
| inter_cu_results_t *ps_16x16_cu_results, |
| inter_cu_results_t *ps_8x8_cu_results, |
| ctb_cluster_info_t *ps_ctb_cluster_info, |
| S08 *pi1_future_list, |
| S08 *pi1_past_list, |
| S32 bidir_enabled, |
| ME_QUALITY_PRESETS_T e_quality_preset) |
| { |
| cluster_16x16_blk_t *ps_blk_16x16; |
| cluster_32x32_blk_t *ps_blk_32x32; |
| cluster_64x64_blk_t *ps_blk_64x64; |
| |
| part_type_results_t *ps_best_result; |
| pu_result_t *aps_part_result[MAX_NUM_PARTS]; |
| pu_result_t *aps_inferior_parts[MAX_NUM_PARTS]; |
| |
| PART_ID_T e_part_id; |
| PART_TYPE_T e_part_type; |
| |
| S32 enable_64x64_merge; |
| S32 i, j, k; |
| S32 mvx, mvy; |
| S32 num_parts; |
| S32 ref_idx; |
| S32 ai4_pred_mode[MAX_NUM_PARTS]; |
| |
| S32 num_32x32_merges = 0; |
| |
| /*****************************************/ |
| /*****************************************/ |
| /********* Enter ye who is HQ ************/ |
| /*****************************************/ |
| /*****************************************/ |
| |
| ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk; |
| |
| /* Initialise data in each of the clusters */ |
| for(i = 0; i < 16; i++) |
| { |
| ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i]; |
| |
| #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS |
| if(e_quality_preset < ME_HIGH_QUALITY) |
| { |
| hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled); |
| } |
| else |
| { |
| ps_blk_16x16->best_inter_cost = 0; |
| ps_blk_16x16->intra_mv_area = 0; |
| } |
| #else |
| hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled); |
| #endif |
| } |
| |
| for(i = 0; i < 4; i++) |
| { |
| ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i]; |
| |
| #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS |
| if(e_quality_preset < ME_HIGH_QUALITY) |
| { |
| hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled); |
| } |
| else |
| { |
| ps_blk_32x32->best_inter_cost = 0; |
| ps_blk_32x32->intra_mv_area = 0; |
| } |
| #else |
| hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled); |
| #endif |
| } |
| |
| #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS |
| if(e_quality_preset < ME_HIGH_QUALITY) |
| { |
| hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled); |
| } |
| else |
| { |
| ps_blk_64x64->best_inter_cost = 0; |
| ps_blk_64x64->intra_mv_area = 0; |
| } |
| #else |
| hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled); |
| #endif |
| |
| /* Initialise data for all nodes in the CU tree */ |
| hme_build_cu_tree( |
| ps_ctb_cluster_info, ps_ctb_cluster_info->ps_cu_tree_root, 0, POS_NA, POS_NA, POS_NA); |
| |
| if(e_quality_preset >= ME_HIGH_QUALITY) |
| { |
| memset(ps_ctb_cluster_info->au1_is_16x16_blk_split, 1, 16 * sizeof(U08)); |
| } |
| |
| #if ENABLE_UNIFORM_CU_SIZE_16x16 || ENABLE_UNIFORM_CU_SIZE_8x8 |
| return; |
| #endif |
| |
| for(i = 0; i < 16; i++) |
| { |
| S32 blk_8x8_mask; |
| S32 is_16x16_blk_valid; |
| S32 num_clusters_updated; |
| S32 num_clusters; |
| |
| blk_8x8_mask = ps_ctb_cluster_info->pi4_blk_8x8_mask[i]; |
| |
| ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i]; |
| |
| is_16x16_blk_valid = (blk_8x8_mask == 0xf); |
| |
| if(is_16x16_blk_valid) |
| { |
| /* Use 8x8 data when 16x16 CU is split */ |
| if(ps_search_results[i].u1_split_flag) |
| { |
| S32 blk_8x8_idx = i << 2; |
| |
| num_parts = 4; |
| e_part_type = PRT_NxN; |
| |
| for(j = 0; j < num_parts; j++, blk_8x8_idx++) |
| { |
| /* Only 2Nx2N partition supported for 8x8 block */ |
| ASSERT( |
| ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].u1_part_type == |
| ((PART_TYPE_T)PRT_2Nx2N)); |
| |
| aps_part_result[j] = |
| &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].as_pu_results[0]; |
| aps_inferior_parts[j] = |
| &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[1].as_pu_results[0]; |
| ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode); |
| } |
| } |
| else |
| { |
| ps_best_result = &ps_16x16_cu_results[i].ps_best_results[0]; |
| |
| e_part_type = (PART_TYPE_T)ps_best_result->u1_part_type; |
| num_parts = gau1_num_parts_in_part_type[e_part_type]; |
| |
| for(j = 0; j < num_parts; j++) |
| { |
| aps_part_result[j] = &ps_best_result->as_pu_results[j]; |
| aps_inferior_parts[j] = &ps_best_result[1].as_pu_results[j]; |
| ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode); |
| } |
| |
| ps_ctb_cluster_info->au1_is_16x16_blk_split[i] = 0; |
| } |
| |
| for(j = 0; j < num_parts; j++) |
| { |
| pu_result_t *ps_part_result = aps_part_result[j]; |
| |
| S32 num_mvs = ((ai4_pred_mode[j] > 1) + 1); |
| |
| e_part_id = ge_part_type_to_part_id[e_part_type][j]; |
| |
| /* Skip clustering if best mode is intra */ |
| if((ps_part_result->pu.b1_intra_flag)) |
| { |
| ps_blk_16x16->intra_mv_area += gai4_partition_area[e_part_id]; |
| ps_blk_16x16->best_inter_cost += aps_inferior_parts[j]->i4_tot_cost; |
| continue; |
| } |
| else |
| { |
| ps_blk_16x16->best_inter_cost += ps_part_result->i4_tot_cost; |
| } |
| |
| #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS |
| if(e_quality_preset >= ME_HIGH_QUALITY) |
| { |
| continue; |
| } |
| #endif |
| |
| for(k = 0; k < num_mvs; k++) |
| { |
| mv_t *ps_mv; |
| |
| pu_mv_t *ps_pu_mv = &ps_part_result->pu.mv; |
| |
| S32 is_l0_mv = ((ai4_pred_mode[j] == 2) && !k) || (ai4_pred_mode[j] == 0); |
| |
| ps_mv = (is_l0_mv) ? (&ps_pu_mv->s_l0_mv) : (&ps_pu_mv->s_l1_mv); |
| |
| mvx = ps_mv->i2_mvx; |
| mvy = ps_mv->i2_mvy; |
| |
| ref_idx = (is_l0_mv) ? pi1_past_list[ps_pu_mv->i1_l0_ref_idx] |
| : pi1_future_list[ps_pu_mv->i1_l1_ref_idx]; |
| |
| num_clusters = ps_blk_16x16->num_clusters; |
| |
| hme_find_and_update_clusters( |
| ps_blk_16x16->as_cluster_data, |
| &(ps_blk_16x16->num_clusters), |
| mvx, |
| mvy, |
| ref_idx, |
| ps_part_result->i4_sdi, |
| e_part_id, |
| (ai4_pred_mode[j] == 2)); |
| |
| num_clusters_updated = (ps_blk_16x16->num_clusters); |
| |
| ps_blk_16x16->au1_num_clusters[ref_idx] += |
| (num_clusters_updated - num_clusters); |
| } |
| } |
| } |
| } |
| |
| /* Search for 32x32 clusters */ |
| for(i = 0; i < 4; i++) |
| { |
| S32 num_clusters_merged; |
| |
| S32 is_32x32_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << i)) || 0; |
| |
| if(is_32x32_blk_valid) |
| { |
| ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i]; |
| ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i << 2]; |
| |
| #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS |
| if(e_quality_preset >= ME_HIGH_QUALITY) |
| { |
| for(j = 0; j < 4; j++, ps_blk_16x16++) |
| { |
| ps_blk_32x32->intra_mv_area += ps_blk_16x16->intra_mv_area; |
| |
| ps_blk_32x32->best_inter_cost += ps_blk_16x16->best_inter_cost; |
| } |
| continue; |
| } |
| #endif |
| |
| hme_update_32x32_clusters(ps_blk_32x32, ps_blk_16x16); |
| |
| if((ps_blk_32x32->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)) |
| { |
| num_clusters_merged = hme_try_merge_clusters_blksize_gt_16( |
| ps_blk_32x32->as_cluster_data, (ps_blk_32x32->num_clusters)); |
| |
| if(num_clusters_merged) |
| { |
| ps_blk_32x32->num_clusters -= num_clusters_merged; |
| |
| UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_32x32); |
| } |
| } |
| } |
| } |
| |
| #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS |
| /* Eliminate outlier 32x32 clusters */ |
| if(e_quality_preset < ME_HIGH_QUALITY) |
| #endif |
| { |
| hme_boot_out_outlier(ps_ctb_cluster_info, 32); |
| |
| /* Find best_uni_ref and best_alt_ref */ |
| hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 32); |
| } |
| |
| /* Populate the CU tree for depths 1 and higher */ |
| { |
| cur_ctb_cu_tree_t *ps_tree_root = ps_ctb_cluster_info->ps_cu_tree_root; |
| cur_ctb_cu_tree_t *ps_tl = ps_tree_root->ps_child_node_tl; |
| cur_ctb_cu_tree_t *ps_tr = ps_tree_root->ps_child_node_tr; |
| cur_ctb_cu_tree_t *ps_bl = ps_tree_root->ps_child_node_bl; |
| cur_ctb_cu_tree_t *ps_br = ps_tree_root->ps_child_node_br; |
| |
| hme_populate_cu_tree( |
| ps_ctb_cluster_info, ps_tl, 1, e_quality_preset, POS_NA, POS_NA, POS_TL); |
| |
| num_32x32_merges += (ps_tl->is_node_valid == 1); |
| |
| hme_populate_cu_tree( |
| ps_ctb_cluster_info, ps_tr, 1, e_quality_preset, POS_NA, POS_NA, POS_TR); |
| |
| num_32x32_merges += (ps_tr->is_node_valid == 1); |
| |
| hme_populate_cu_tree( |
| ps_ctb_cluster_info, ps_bl, 1, e_quality_preset, POS_NA, POS_NA, POS_BL); |
| |
| num_32x32_merges += (ps_bl->is_node_valid == 1); |
| |
| hme_populate_cu_tree( |
| ps_ctb_cluster_info, ps_br, 1, e_quality_preset, POS_NA, POS_NA, POS_BR); |
| |
| num_32x32_merges += (ps_br->is_node_valid == 1); |
| } |
| |
| #if !ENABLE_4CTB_EVALUATION |
| if(e_quality_preset < ME_HIGH_QUALITY) |
| { |
| enable_64x64_merge = (num_32x32_merges >= 3); |
| } |
| #else |
| if(e_quality_preset < ME_HIGH_QUALITY) |
| { |
| enable_64x64_merge = 1; |
| } |
| #endif |
| |
| #if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS |
| if(e_quality_preset >= ME_HIGH_QUALITY) |
| { |
| enable_64x64_merge = 1; |
| } |
| #else |
| if(e_quality_preset >= ME_HIGH_QUALITY) |
| { |
| enable_64x64_merge = (num_32x32_merges >= 3); |
| } |
| #endif |
| |
| if(enable_64x64_merge) |
| { |
| S32 num_clusters_merged; |
| |
| ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[0]; |
| |
| #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS |
| if(e_quality_preset >= ME_HIGH_QUALITY) |
| { |
| for(j = 0; j < 4; j++, ps_blk_32x32++) |
| { |
| ps_blk_64x64->intra_mv_area += ps_blk_32x32->intra_mv_area; |
| |
| ps_blk_64x64->best_inter_cost += ps_blk_32x32->best_inter_cost; |
| } |
| } |
| else |
| #endif |
| { |
| hme_update_64x64_clusters(ps_blk_64x64, ps_blk_32x32); |
| |
| if((ps_blk_64x64->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)) |
| { |
| num_clusters_merged = hme_try_merge_clusters_blksize_gt_16( |
| ps_blk_64x64->as_cluster_data, (ps_blk_64x64->num_clusters)); |
| |
| if(num_clusters_merged) |
| { |
| ps_blk_64x64->num_clusters -= num_clusters_merged; |
| |
| UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_64x64); |
| } |
| } |
| } |
| |
| #if !ENABLE_4CTB_EVALUATION |
| if(e_quality_preset < ME_HIGH_QUALITY) |
| { |
| S32 best_inter_cost = ps_blk_64x64->best_inter_cost; |
| S32 best_intra_cost = |
| ((ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost + |
| ps_ctb_cluster_info->i4_frame_qstep * |
| ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16) < 0) |
| ? MAX_32BIT_VAL |
| : (ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost + |
| ps_ctb_cluster_info->i4_frame_qstep * |
| ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16); |
| S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost; |
| S32 cost_differential = (best_inter_cost - best_cost); |
| |
| enable_64x64_merge = |
| ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)); |
| } |
| #endif |
| } |
| |
| if(enable_64x64_merge) |
| { |
| #if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS |
| if(e_quality_preset < ME_HIGH_QUALITY) |
| #endif |
| { |
| hme_boot_out_outlier(ps_ctb_cluster_info, 64); |
| |
| hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 64); |
| } |
| |
| hme_populate_cu_tree( |
| ps_ctb_cluster_info, |
| ps_ctb_cluster_info->ps_cu_tree_root, |
| 0, |
| e_quality_preset, |
| POS_NA, |
| POS_NA, |
| POS_NA); |
| } |
| } |
| #endif |
| |
| static __inline void hme_merge_prms_init( |
| hme_merge_prms_t *ps_prms, |
| layer_ctxt_t *ps_curr_layer, |
| refine_prms_t *ps_refine_prms, |
| me_frm_ctxt_t *ps_me_ctxt, |
| range_prms_t *ps_range_prms_rec, |
| range_prms_t *ps_range_prms_inp, |
| mv_grid_t **pps_mv_grid, |
| inter_ctb_prms_t *ps_inter_ctb_prms, |
| S32 i4_num_pred_dir, |
| S32 i4_32x32_id, |
| BLK_SIZE_T e_blk_size, |
| ME_QUALITY_PRESETS_T e_me_quality_presets) |
| { |
| S32 i4_use_rec = ps_refine_prms->i4_use_rec_in_fpel; |
| S32 i4_cu_16x16 = (BLK_32x32 == e_blk_size) ? (i4_32x32_id << 2) : 0; |
| |
| /* Currently not enabling segmentation info from prev layers */ |
| ps_prms->i4_seg_info_avail = 0; |
| ps_prms->i4_part_mask = 0; |
| |
| /* Number of reference pics in which to do merge */ |
| ps_prms->i4_num_ref = i4_num_pred_dir; |
| |
| /* Layer ctxt info */ |
| ps_prms->ps_layer_ctxt = ps_curr_layer; |
| |
| ps_prms->ps_inter_ctb_prms = ps_inter_ctb_prms; |
| |
| /* Top left, top right, bottom left and bottom right 16x16 units */ |
| if(BLK_32x32 == e_blk_size) |
| { |
| ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16]; |
| ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 1]; |
| ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 2]; |
| ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 3]; |
| |
| /* Merge results stored here */ |
| ps_prms->ps_results_merge = &ps_me_ctxt->as_search_results_32x32[i4_32x32_id]; |
| |
| /* This could be lesser than the number of 16x16results generated*/ |
| /* For now, keeping it to be same */ |
| ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_fpel_results; |
| ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[i4_32x32_id << 4]; |
| ps_prms->ps_results_grandchild = NULL; |
| } |
| else |
| { |
| ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_32x32[0]; |
| ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_32x32[1]; |
| ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_32x32[2]; |
| ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_32x32[3]; |
| |
| /* Merge results stored here */ |
| ps_prms->ps_results_merge = &ps_me_ctxt->s_search_results_64x64; |
| |
| ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_32x32_merge_results; |
| ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[0]; |
| ps_prms->ps_results_grandchild = ps_me_ctxt->as_search_results_16x16; |
| } |
| |
| if(i4_use_rec) |
| { |
| WORD32 ref_ctr; |
| |
| for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++) |
| { |
| ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_rec[ref_ctr]; |
| } |
| } |
| else |
| { |
| WORD32 ref_ctr; |
| |
| for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++) |
| { |
| ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_inp[ref_ctr]; |
| } |
| } |
| ps_prms->i4_use_rec = i4_use_rec; |
| |
| ps_prms->pf_mv_cost_compute = compute_mv_cost_implicit_high_speed; |
| |
| ps_prms->pps_mv_grid = pps_mv_grid; |
| |
| ps_prms->log_ctb_size = ps_me_ctxt->log_ctb_size; |
| |
| ps_prms->e_quality_preset = e_me_quality_presets; |
| ps_prms->pi1_future_list = ps_me_ctxt->ai1_future_list; |
| ps_prms->pi1_past_list = ps_me_ctxt->ai1_past_list; |
| ps_prms->ps_cluster_info = ps_me_ctxt->ps_ctb_cluster_info; |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_refine(me_ctxt_t *ps_ctxt, |
| * refine_layer_prms_t *ps_refine_prms) |
| * |
| * @brief Top level entry point for refinement ME |
| * |
| * @param[in,out] ps_ctxt: ME Handle |
| * |
| * @param[in] ps_refine_prms : refinement layer prms |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| void hme_refine( |
| me_ctxt_t *ps_thrd_ctxt, |
| refine_prms_t *ps_refine_prms, |
| PF_EXT_UPDATE_FXN_T pf_ext_update_fxn, |
| layer_ctxt_t *ps_coarse_layer, |
| multi_thrd_ctxt_t *ps_multi_thrd_ctxt, |
| S32 lyr_job_type, |
| S32 thrd_id, |
| S32 me_frm_id, |
| pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input) |
| { |
| inter_ctb_prms_t s_common_frm_prms; |
| |
| BLK_SIZE_T e_search_blk_size, e_result_blk_size; |
| WORD32 i4_me_frm_id = me_frm_id % MAX_NUM_ME_PARALLEL; |
| me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id]; |
| ME_QUALITY_PRESETS_T e_me_quality_presets = |
| ps_thrd_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets; |
| |
| WORD32 num_rows_proc = 0; |
| WORD32 num_act_ref_pics; |
| WORD16 i2_prev_enc_frm_max_mv_y; |
| WORD32 i4_idx_dvsr_p = ps_multi_thrd_ctxt->i4_idx_dvsr_p; |
| |
| /*************************************************************************/ |
| /* Complexity of search: Low to High */ |
| /*************************************************************************/ |
| SEARCH_COMPLEXITY_T e_search_complexity; |
| |
| /*************************************************************************/ |
| /* to store the PU results which are passed to the decide_part_types */ |
| /* as input prms. Multiplied by 4 as the max number of Ref in a List is 4*/ |
| /*************************************************************************/ |
| |
| pu_result_t as_pu_results[2][TOT_NUM_PARTS][MAX_NUM_RESULTS_PER_PART_LIST]; |
| inter_pu_results_t as_inter_pu_results[4]; |
| inter_pu_results_t *ps_pu_results = as_inter_pu_results; |
| |
| /*************************************************************************/ |
| /* Config parameter structures for varius ME submodules */ |
| /*************************************************************************/ |
| hme_merge_prms_t s_merge_prms_32x32_tl, s_merge_prms_32x32_tr; |
| hme_merge_prms_t s_merge_prms_32x32_bl, s_merge_prms_32x32_br; |
| hme_merge_prms_t s_merge_prms_64x64; |
| hme_search_prms_t s_search_prms_blk; |
| mvbank_update_prms_t s_mv_update_prms; |
| hme_ctb_prms_t s_ctb_prms; |
| hme_subpel_prms_t s_subpel_prms; |
| fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_ctxt->ps_fullpel_refine_ctxt; |
| ctb_cluster_info_t *ps_ctb_cluster_info; |
| fpel_srch_cand_init_data_t s_srch_cand_init_data; |
| |
| /* 4 bits (LSBs) of this variable control merge of 4 32x32 CUs in CTB */ |
| S32 en_merge_32x32; |
| /* 5 lsb's specify whether or not merge algorithm is required */ |
| /* to be executed or not. Relevant only in PQ. Ought to be */ |
| /* used in conjunction with en_merge_32x32 and */ |
| /* ps_ctb_bound_attrs->u1_merge_to_64x64_flag. This is */ |
| /* required when all children are deemed to be intras */ |
| S32 en_merge_execution; |
| |
| /*************************************************************************/ |
| /* All types of search candidates for predictor based search. */ |
| /*************************************************************************/ |
| S32 num_init_candts = 0; |
| S32 i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0; |
| S32 i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1; |
| search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS]; |
| search_node_t as_top_neighbours[4], as_left_neighbours[3]; |
| |
| pf_get_wt_inp fp_get_wt_inp; |
| |
| search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9]; |
| U32 au4_unique_node_map[MAP_X_MAX * 2]; |
| |
| /* Controls the boundary attributes of CTB, whether it has 64x64 or not */ |
| ctb_boundary_attrs_t *ps_ctb_bound_attrs; |
| |
| /*************************************************************************/ |
| /* points ot the search results for the blk level search (8x8/16x16) */ |
| /*************************************************************************/ |
| search_results_t *ps_search_results; |
| |
| /*************************************************************************/ |
| /* Coordinates */ |
| /*************************************************************************/ |
| S32 blk_x, blk_y, i4_ctb_x, i4_ctb_y, tile_col_idx, blk_id_in_ctb; |
| S32 pos_x, pos_y; |
| S32 blk_id_in_full_ctb; |
| |
| /*************************************************************************/ |
| /* Related to dimensions of block being searched and pic dimensions */ |
| /*************************************************************************/ |
| S32 blk_4x4_to_16x16; |
| S32 blk_wd, blk_ht, blk_size_shift; |
| S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb; |
| S32 num_results_prev_layer; |
| |
| /*************************************************************************/ |
| /* Size of a basic unit for this layer. For non encode layers, we search */ |
| /* in block sizes of 8x8. For encode layers, though we search 16x16s the */ |
| /* basic unit size is the ctb size. */ |
| /*************************************************************************/ |
| S32 unit_size; |
| |
| /*************************************************************************/ |
| /* Local variable storing results of any 4 CU merge to bigger CU */ |
| /*************************************************************************/ |
| CU_MERGE_RESULT_T e_merge_result; |
| |
| /*************************************************************************/ |
| /* This mv grid stores results during and after fpel search, during */ |
| /* merge, subpel and bidirect refinements stages. 2 instances of this are*/ |
| /* meant for the 2 directions of search (l0 and l1). */ |
| /*************************************************************************/ |
| mv_grid_t *aps_mv_grid[2]; |
| |
| /*************************************************************************/ |
| /* Pointers to context in current and coarser layers */ |
| /*************************************************************************/ |
| layer_ctxt_t *ps_curr_layer, *ps_prev_layer; |
| |
| /*************************************************************************/ |
| /* to store mv range per blk, and picture limit, allowed search range */ |
| /* range prms in hpel and qpel units as well */ |
| /*************************************************************************/ |
| range_prms_t as_range_prms_inp[MAX_NUM_REF], as_range_prms_rec[MAX_NUM_REF]; |
| range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF]; |
| range_prms_t as_range_prms_hpel[MAX_NUM_REF], as_range_prms_qpel[MAX_NUM_REF]; |
| |
| /*************************************************************************/ |
| /* These variables are used to track number of references at different */ |
| /* stages of ME. */ |
| /*************************************************************************/ |
| S32 i4_num_pred_dir; |
| S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer; |
| S32 lambda_recon = ps_refine_prms->lambda_recon; |
| |
| /* Counts successful merge to 32x32 every CTB (0-4) */ |
| S32 merge_count_32x32; |
| |
| S32 ai4_id_coloc[14], ai4_id_Z[2]; |
| U08 au1_search_candidate_list_index[2]; |
| S32 ai4_num_coloc_cands[2]; |
| U08 u1_pred_dir, u1_pred_dir_ctr; |
| |
| /*************************************************************************/ |
| /* Input pointer and stride */ |
| /*************************************************************************/ |
| U08 *pu1_inp; |
| S32 i4_inp_stride; |
| S32 end_of_frame; |
| S32 num_sync_units_in_row, num_sync_units_in_tile; |
| |
| /*************************************************************************/ |
| /* Indicates whether the all 4 8x8 blks are valid in the 16x16 blk in the*/ |
| /* encode layer. If not 15, then 1 or more 8x8 blks not valid. Means that*/ |
| /* we need to stop merges and force 8x8 CUs for that 16x16 blk */ |
| /*************************************************************************/ |
| S32 blk_8x8_mask; |
| S32 ai4_blk_8x8_mask[16]; |
| U08 au1_is_64x64Blk_noisy[1]; |
| U08 au1_is_32x32Blk_noisy[4]; |
| U08 au1_is_16x16Blk_noisy[16]; |
| |
| ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list = |
| ps_thrd_ctxt->ps_cmn_utils_optimised_function_list; |
| ihevce_me_optimised_function_list_t *ps_me_optimised_function_list = |
| ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list); |
| |
| ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1); |
| |
| /*************************************************************************/ |
| /* Pointers to current and coarse layer are needed for projection */ |
| /* Pointer to prev layer are needed for other candts like coloc */ |
| /*************************************************************************/ |
| ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id]; |
| |
| ps_prev_layer = hme_get_past_layer_ctxt( |
| ps_thrd_ctxt, ps_ctxt, ps_refine_prms->i4_layer_id, ps_multi_thrd_ctxt->i4_num_me_frm_pllel); |
| |
| num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref; |
| |
| /* Function pointer is selected based on the C vc X86 macro */ |
| |
| fp_get_wt_inp = ps_me_optimised_function_list->pf_get_wt_inp_ctb; |
| |
| i4_inp_stride = ps_curr_layer->i4_inp_stride; |
| i4_pic_wd = ps_curr_layer->i4_wd; |
| i4_pic_ht = ps_curr_layer->i4_ht; |
| e_search_complexity = ps_refine_prms->e_search_complexity; |
| end_of_frame = 0; |
| |
| /* This points to all the initial candts */ |
| ps_search_candts = &as_search_candts[0]; |
| |
| /* mv grid being huge strucutre is part of context */ |
| aps_mv_grid[0] = &ps_ctxt->as_mv_grid[0]; |
| aps_mv_grid[1] = &ps_ctxt->as_mv_grid[1]; |
| |
| /*************************************************************************/ |
| /* If the current layer is encoded (since it may be multicast or final */ |
| /* layer (finest)), then we use 16x16 blk size with some selected parts */ |
| /* If the current layer is not encoded, then we use 8x8 blk size, with */ |
| /* enable or disable of 4x4 partitions depending on the input prms */ |
| /*************************************************************************/ |
| e_search_blk_size = BLK_16x16; |
| blk_wd = blk_ht = 16; |
| blk_size_shift = 4; |
| e_result_blk_size = BLK_8x8; |
| s_mv_update_prms.i4_shift = 1; |
| |
| if(ps_coarse_layer->ps_layer_mvbank->e_blk_size == BLK_4x4) |
| { |
| blk_4x4_to_16x16 = 1; |
| } |
| else |
| { |
| blk_4x4_to_16x16 = 0; |
| } |
| |
| unit_size = 1 << ps_ctxt->log_ctb_size; |
| s_search_prms_blk.i4_inp_stride = unit_size; |
| |
| /* This is required to properly update the layer mv bank */ |
| s_mv_update_prms.e_search_blk_size = e_search_blk_size; |
| s_search_prms_blk.e_blk_size = e_search_blk_size; |
| |
| /*************************************************************************/ |
| /* If current layer is explicit, then the number of ref frames are to */ |
| /* be same as previous layer. Else it will be 2 */ |
| /*************************************************************************/ |
| i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref; |
| i4_num_pred_dir = |
| (ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 0) && (i4_num_act_ref_l1 > 0)) + |
| 1; |
| |
| #if USE_MODIFIED == 1 |
| s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified; |
| #else |
| s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed; |
| #endif |
| |
| i4_num_pred_dir = MIN(i4_num_pred_dir, i4_num_ref_prev_layer); |
| if(i4_num_ref_prev_layer <= 2) |
| { |
| i4_num_ref_each_dir = 1; |
| } |
| else |
| { |
| i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1; |
| } |
| |
| s_mv_update_prms.i4_num_ref = i4_num_pred_dir; |
| s_mv_update_prms.i4_num_results_to_store = |
| MIN((ps_ctxt->s_frm_prms.bidir_enabled) ? ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref |
| : (i4_num_act_ref_l0 > 1) + 1, |
| ps_refine_prms->i4_num_results_per_part); |
| |
| /*************************************************************************/ |
| /* Initialization of merge params for 16x16 to 32x32 merge. */ |
| /* There are 4 32x32 units in a CTB, so 4 param structures initialized */ |
| /*************************************************************************/ |
| { |
| hme_merge_prms_t *aps_merge_prms[4]; |
| aps_merge_prms[0] = &s_merge_prms_32x32_tl; |
| aps_merge_prms[1] = &s_merge_prms_32x32_tr; |
| aps_merge_prms[2] = &s_merge_prms_32x32_bl; |
| aps_merge_prms[3] = &s_merge_prms_32x32_br; |
| for(i = 0; i < 4; i++) |
| { |
| hme_merge_prms_init( |
| aps_merge_prms[i], |
| ps_curr_layer, |
| ps_refine_prms, |
| ps_ctxt, |
| as_range_prms_rec, |
| as_range_prms_inp, |
| &aps_mv_grid[0], |
| &s_common_frm_prms, |
| i4_num_pred_dir, |
| i, |
| BLK_32x32, |
| e_me_quality_presets); |
| } |
| } |
| |
| /*************************************************************************/ |
| /* Initialization of merge params for 32x32 to 64x64 merge. */ |
| /* There are 4 32x32 units in a CTB, so only 1 64x64 CU can be in CTB */ |
| /*************************************************************************/ |
| { |
| hme_merge_prms_init( |
| &s_merge_prms_64x64, |
| ps_curr_layer, |
| ps_refine_prms, |
| ps_ctxt, |
| as_range_prms_rec, |
| as_range_prms_inp, |
| &aps_mv_grid[0], |
| &s_common_frm_prms, |
| i4_num_pred_dir, |
| 0, |
| BLK_64x64, |
| e_me_quality_presets); |
| } |
| |
| /* Pointers to cu_results are initialised here */ |
| { |
| WORD32 i; |
| |
| ps_ctxt->s_search_results_64x64.ps_cu_results = &ps_ctxt->s_cu64x64_results; |
| |
| for(i = 0; i < 4; i++) |
| { |
| ps_ctxt->as_search_results_32x32[i].ps_cu_results = &ps_ctxt->as_cu32x32_results[i]; |
| } |
| |
| for(i = 0; i < 16; i++) |
| { |
| ps_ctxt->as_search_results_16x16[i].ps_cu_results = &ps_ctxt->as_cu16x16_results[i]; |
| } |
| } |
| |
| /*************************************************************************/ |
| /* SUBPEL Params initialized here */ |
| /*************************************************************************/ |
| { |
| s_subpel_prms.ps_search_results_16x16 = &ps_ctxt->as_search_results_16x16[0]; |
| s_subpel_prms.ps_search_results_32x32 = &ps_ctxt->as_search_results_32x32[0]; |
| s_subpel_prms.ps_search_results_64x64 = &ps_ctxt->s_search_results_64x64; |
| |
| s_subpel_prms.i4_num_16x16_candts = ps_refine_prms->i4_num_fpel_results; |
| s_subpel_prms.i4_num_32x32_candts = ps_refine_prms->i4_num_32x32_merge_results; |
| s_subpel_prms.i4_num_64x64_candts = ps_refine_prms->i4_num_64x64_merge_results; |
| |
| s_subpel_prms.i4_num_steps_hpel_refine = ps_refine_prms->i4_num_steps_hpel_refine; |
| s_subpel_prms.i4_num_steps_qpel_refine = ps_refine_prms->i4_num_steps_qpel_refine; |
| |
| s_subpel_prms.i4_use_satd = ps_refine_prms->i4_use_satd_subpel; |
| |
| s_subpel_prms.i4_inp_stride = unit_size; |
| |
| s_subpel_prms.u1_max_subpel_candts_2Nx2N = ps_refine_prms->u1_max_subpel_candts_2Nx2N; |
| s_subpel_prms.u1_max_subpel_candts_NxN = ps_refine_prms->u1_max_subpel_candts_NxN; |
| s_subpel_prms.u1_subpel_candt_threshold = ps_refine_prms->u1_subpel_candt_threshold; |
| |
| s_subpel_prms.pf_qpel_interp = ps_me_optimised_function_list->pf_qpel_interp_avg_generic; |
| |
| { |
| WORD32 ref_ctr; |
| for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++) |
| { |
| s_subpel_prms.aps_mv_range_hpel[ref_ctr] = &as_range_prms_hpel[ref_ctr]; |
| s_subpel_prms.aps_mv_range_qpel[ref_ctr] = &as_range_prms_qpel[ref_ctr]; |
| } |
| } |
| s_subpel_prms.pi2_inp_bck = ps_ctxt->pi2_inp_bck; |
| |
| #if USE_MODIFIED == 0 |
| s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed; |
| #else |
| s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified; |
| #endif |
| s_subpel_prms.e_me_quality_presets = e_me_quality_presets; |
| |
| /* BI Refinement done only if this field is 1 */ |
| s_subpel_prms.bidir_enabled = ps_refine_prms->bidir_enabled; |
| |
| s_subpel_prms.u1_num_ref = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past; |
| |
| s_subpel_prms.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0; |
| s_subpel_prms.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1; |
| s_subpel_prms.u1_max_num_subpel_refine_centers = |
| ps_refine_prms->u1_max_num_subpel_refine_centers; |
| } |
| |
| /* inter_ctb_prms_t struct initialisation */ |
| { |
| inter_ctb_prms_t *ps_inter_ctb_prms = &s_common_frm_prms; |
| hme_subpel_prms_t *ps_subpel_prms = &s_subpel_prms; |
| |
| ps_inter_ctb_prms->pps_rec_list_l0 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l0; |
| ps_inter_ctb_prms->pps_rec_list_l1 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l1; |
| ps_inter_ctb_prms->wpred_log_wdc = ps_ctxt->s_wt_pred.wpred_log_wdc; |
| ps_inter_ctb_prms->u1_max_tr_depth = ps_thrd_ctxt->s_init_prms.u1_max_tr_depth; |
| ps_inter_ctb_prms->i1_quality_preset = e_me_quality_presets; |
| ps_inter_ctb_prms->i4_bidir_enabled = ps_subpel_prms->bidir_enabled; |
| ps_inter_ctb_prms->i4_inp_stride = ps_subpel_prms->i4_inp_stride; |
| ps_inter_ctb_prms->u1_num_ref = ps_subpel_prms->u1_num_ref; |
| ps_inter_ctb_prms->u1_use_satd = ps_subpel_prms->i4_use_satd; |
| ps_inter_ctb_prms->i4_rec_stride = ps_curr_layer->i4_rec_stride; |
| ps_inter_ctb_prms->u1_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0; |
| ps_inter_ctb_prms->u1_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1; |
| ps_inter_ctb_prms->i4_lamda = lambda_recon; |
| ps_inter_ctb_prms->u1_lamda_qshift = ps_refine_prms->lambda_q_shift; |
| ps_inter_ctb_prms->i4_qstep_ls8 = ps_ctxt->ps_hme_frm_prms->qstep_ls8; |
| ps_inter_ctb_prms->pi4_inv_wt = ps_ctxt->s_wt_pred.a_inv_wpred_wt; |
| ps_inter_ctb_prms->pi1_past_list = ps_ctxt->ai1_past_list; |
| ps_inter_ctb_prms->pi1_future_list = ps_ctxt->ai1_future_list; |
| ps_inter_ctb_prms->pu4_src_variance = s_search_prms_blk.au4_src_variance; |
| ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands = |
| ps_refine_prms->u1_max_2nx2n_tu_recur_cands; |
| } |
| |
| for(i = 0; i < MAX_INIT_CANDTS; i++) |
| { |
| ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i]; |
| ps_search_candts[i].ps_search_node->ps_mv = &ps_ctxt->as_search_cand_mv[i]; |
| |
| INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0); |
| } |
| num_act_ref_pics = |
| ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1; |
| |
| if(num_act_ref_pics) |
| { |
| hme_search_cand_data_init( |
| ai4_id_Z, |
| ai4_id_coloc, |
| ai4_num_coloc_cands, |
| au1_search_candidate_list_index, |
| i4_num_act_ref_l0, |
| i4_num_act_ref_l1, |
| ps_ctxt->s_frm_prms.bidir_enabled, |
| blk_4x4_to_16x16); |
| } |
| |
| if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 1)) |
| { |
| ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0]; |
| ps_search_candts[ai4_id_Z[1]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[1]; |
| } |
| else if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 == 1)) |
| { |
| ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0]; |
| } |
| |
| for(i = 0; i < 3; i++) |
| { |
| search_node_t *ps_search_node; |
| ps_search_node = &as_left_neighbours[i]; |
| INIT_SEARCH_NODE(ps_search_node, 0); |
| ps_search_node = &as_top_neighbours[i]; |
| INIT_SEARCH_NODE(ps_search_node, 0); |
| } |
| |
| INIT_SEARCH_NODE(&as_top_neighbours[3], 0); |
| as_left_neighbours[2].u1_is_avail = 0; |
| |
| /*************************************************************************/ |
| /* Initialize all the search results structure here. We update all the */ |
| /* search results to default values, and configure things like blk sizes */ |
| /*************************************************************************/ |
| if(num_act_ref_pics) |
| { |
| S32 i4_x, i4_y; |
| /* 16x16 results */ |
| for(i = 0; i < 16; i++) |
| { |
| search_results_t *ps_search_results; |
| S32 pred_lx; |
| ps_search_results = &ps_ctxt->as_search_results_16x16[i]; |
| i4_x = (S32)gau1_encode_to_raster_x[i]; |
| i4_y = (S32)gau1_encode_to_raster_y[i]; |
| i4_x <<= 4; |
| i4_y <<= 4; |
| |
| hme_init_search_results( |
| ps_search_results, |
| i4_num_pred_dir, |
| ps_refine_prms->i4_num_fpel_results, |
| ps_refine_prms->i4_num_results_per_part, |
| e_search_blk_size, |
| i4_x, |
| i4_y, |
| &ps_ctxt->au1_is_past[0]); |
| |
| for(pred_lx = 0; pred_lx < 2; pred_lx++) |
| { |
| pred_ctxt_t *ps_pred_ctxt; |
| |
| ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; |
| |
| hme_init_pred_ctxt_encode( |
| ps_pred_ctxt, |
| ps_search_results, |
| ps_search_candts[ai4_id_coloc[0]].ps_search_node, |
| ps_search_candts[ai4_id_Z[0]].ps_search_node, |
| aps_mv_grid[pred_lx], |
| pred_lx, |
| lambda_recon, |
| ps_refine_prms->lambda_q_shift, |
| &ps_ctxt->apu1_ref_bits_tlu_lc[0], |
| &ps_ctxt->ai2_ref_scf[0]); |
| } |
| } |
| |
| for(i = 0; i < 4; i++) |
| { |
| search_results_t *ps_search_results; |
| S32 pred_lx; |
| ps_search_results = &ps_ctxt->as_search_results_32x32[i]; |
| |
| i4_x = (S32)gau1_encode_to_raster_x[i]; |
| i4_y = (S32)gau1_encode_to_raster_y[i]; |
| i4_x <<= 5; |
| i4_y <<= 5; |
| |
| hme_init_search_results( |
| ps_search_results, |
| i4_num_pred_dir, |
| ps_refine_prms->i4_num_32x32_merge_results, |
| ps_refine_prms->i4_num_results_per_part, |
| BLK_32x32, |
| i4_x, |
| i4_y, |
| &ps_ctxt->au1_is_past[0]); |
| |
| for(pred_lx = 0; pred_lx < 2; pred_lx++) |
| { |
| pred_ctxt_t *ps_pred_ctxt; |
| |
| ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; |
| |
| hme_init_pred_ctxt_encode( |
| ps_pred_ctxt, |
| ps_search_results, |
| ps_search_candts[ai4_id_coloc[0]].ps_search_node, |
| ps_search_candts[ai4_id_Z[0]].ps_search_node, |
| aps_mv_grid[pred_lx], |
| pred_lx, |
| lambda_recon, |
| ps_refine_prms->lambda_q_shift, |
| &ps_ctxt->apu1_ref_bits_tlu_lc[0], |
| &ps_ctxt->ai2_ref_scf[0]); |
| } |
| } |
| |
| { |
| search_results_t *ps_search_results; |
| S32 pred_lx; |
| ps_search_results = &ps_ctxt->s_search_results_64x64; |
| |
| hme_init_search_results( |
| ps_search_results, |
| i4_num_pred_dir, |
| ps_refine_prms->i4_num_64x64_merge_results, |
| ps_refine_prms->i4_num_results_per_part, |
| BLK_64x64, |
| 0, |
| 0, |
| &ps_ctxt->au1_is_past[0]); |
| |
| for(pred_lx = 0; pred_lx < 2; pred_lx++) |
| { |
| pred_ctxt_t *ps_pred_ctxt; |
| |
| ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; |
| |
| hme_init_pred_ctxt_encode( |
| ps_pred_ctxt, |
| ps_search_results, |
| ps_search_candts[ai4_id_coloc[0]].ps_search_node, |
| ps_search_candts[ai4_id_Z[0]].ps_search_node, |
| aps_mv_grid[pred_lx], |
| pred_lx, |
| lambda_recon, |
| ps_refine_prms->lambda_q_shift, |
| &ps_ctxt->apu1_ref_bits_tlu_lc[0], |
| &ps_ctxt->ai2_ref_scf[0]); |
| } |
| } |
| } |
| |
| /* Initialise the structure used in clustering */ |
| if(ME_PRISTINE_QUALITY == e_me_quality_presets) |
| { |
| ps_ctb_cluster_info = ps_ctxt->ps_ctb_cluster_info; |
| |
| ps_ctb_cluster_info->ps_16x16_blk = ps_ctxt->ps_blk_16x16; |
| ps_ctb_cluster_info->ps_32x32_blk = ps_ctxt->ps_blk_32x32; |
| ps_ctb_cluster_info->ps_64x64_blk = ps_ctxt->ps_blk_64x64; |
| ps_ctb_cluster_info->pi4_blk_8x8_mask = ai4_blk_8x8_mask; |
| ps_ctb_cluster_info->sdi_threshold = ps_refine_prms->sdi_threshold; |
| ps_ctb_cluster_info->i4_frame_qstep = ps_ctxt->frm_qstep; |
| ps_ctb_cluster_info->i4_frame_qstep_multiplier = 16; |
| } |
| |
| /*********************************************************************/ |
| /* Initialize the dyn. search range params. for each reference index */ |
| /* in current layer ctxt */ |
| /*********************************************************************/ |
| |
| /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ |
| if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) |
| { |
| WORD32 ref_ctr; |
| /* set no. of act ref in L0 for further use at frame level */ |
| ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0 = |
| ps_ctxt->s_frm_prms.u1_num_active_ref_l0; |
| |
| for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++) |
| { |
| INIT_DYN_SEARCH_PRMS( |
| &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[ref_ctr], |
| ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]); |
| } |
| } |
| /*************************************************************************/ |
| /* Now that the candidates have been ordered, to choose the right number */ |
| /* of initial candidates. */ |
| /*************************************************************************/ |
| if(blk_4x4_to_16x16) |
| { |
| if(i4_num_ref_prev_layer > 2) |
| { |
| if(e_search_complexity == SEARCH_CX_LOW) |
| num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); |
| else if(e_search_complexity == SEARCH_CX_MED) |
| num_init_candts = 14 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); |
| else if(e_search_complexity == SEARCH_CX_HIGH) |
| num_init_candts = 21 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); |
| else |
| ASSERT(0); |
| } |
| else if(i4_num_ref_prev_layer == 2) |
| { |
| if(e_search_complexity == SEARCH_CX_LOW) |
| num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); |
| else if(e_search_complexity == SEARCH_CX_MED) |
| num_init_candts = 12 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); |
| else if(e_search_complexity == SEARCH_CX_HIGH) |
| num_init_candts = 19 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); |
| else |
| ASSERT(0); |
| } |
| else |
| { |
| if(e_search_complexity == SEARCH_CX_LOW) |
| num_init_candts = 5; |
| else if(e_search_complexity == SEARCH_CX_MED) |
| num_init_candts = 12; |
| else if(e_search_complexity == SEARCH_CX_HIGH) |
| num_init_candts = 19; |
| else |
| ASSERT(0); |
| } |
| } |
| else |
| { |
| if(i4_num_ref_prev_layer > 2) |
| { |
| if(e_search_complexity == SEARCH_CX_LOW) |
| num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); |
| else if(e_search_complexity == SEARCH_CX_MED) |
| num_init_candts = 13 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); |
| else if(e_search_complexity == SEARCH_CX_HIGH) |
| num_init_candts = 18 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); |
| else |
| ASSERT(0); |
| } |
| else if(i4_num_ref_prev_layer == 2) |
| { |
| if(e_search_complexity == SEARCH_CX_LOW) |
| num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); |
| else if(e_search_complexity == SEARCH_CX_MED) |
| num_init_candts = 11 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); |
| else if(e_search_complexity == SEARCH_CX_HIGH) |
| num_init_candts = 16 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1); |
| else |
| ASSERT(0); |
| } |
| else |
| { |
| if(e_search_complexity == SEARCH_CX_LOW) |
| num_init_candts = 5; |
| else if(e_search_complexity == SEARCH_CX_MED) |
| num_init_candts = 11; |
| else if(e_search_complexity == SEARCH_CX_HIGH) |
| num_init_candts = 16; |
| else |
| ASSERT(0); |
| } |
| } |
| |
| /*************************************************************************/ |
| /* The following search parameters are fixed throughout the search across*/ |
| /* all blks. So these are configured outside processing loop */ |
| /*************************************************************************/ |
| s_search_prms_blk.i4_num_init_candts = num_init_candts; |
| s_search_prms_blk.i4_start_step = 1; |
| s_search_prms_blk.i4_use_satd = 0; |
| s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel; |
| /* we use recon only for encoded layers, otherwise it is not available */ |
| s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel; |
| |
| s_search_prms_blk.ps_search_candts = ps_search_candts; |
| if(s_search_prms_blk.i4_use_rec) |
| { |
| WORD32 ref_ctr; |
| for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++) |
| s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_rec[ref_ctr]; |
| } |
| else |
| { |
| WORD32 ref_ctr; |
| for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++) |
| s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_inp[ref_ctr]; |
| } |
| |
| /*************************************************************************/ |
| /* Initialize coordinates. Meaning as follows */ |
| /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks */ |
| /* blk_y : same as above, y coord. */ |
| /* num_blks_in_this_ctb : number of blks in this given ctb that starts */ |
| /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries. */ |
| /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left */ |
| /* corner of the picture. Always multiple of 64. */ |
| /* blk_id_in_ctb : encode order id of the blk in the ctb. */ |
| /*************************************************************************/ |
| blk_y = 0; |
| blk_id_in_ctb = 0; |
| i4_ctb_y = 0; |
| |
| /*************************************************************************/ |
| /* Picture limit on all 4 sides. This will be used to set mv limits for */ |
| /* every block given its coordinate. Note thsi assumes that the min amt */ |
| /* of padding to right of pic is equal to the blk size. If we go all the */ |
| /* way upto 64x64, then the min padding on right size of picture should */ |
| /* be 64, and also on bottom side of picture. */ |
| /*************************************************************************/ |
| SET_PIC_LIMIT( |
| s_pic_limit_inp, |
| ps_curr_layer->i4_pad_x_rec, |
| ps_curr_layer->i4_pad_y_rec, |
| ps_curr_layer->i4_wd, |
| ps_curr_layer->i4_ht, |
| s_search_prms_blk.i4_num_steps_post_refine); |
| |
| SET_PIC_LIMIT( |
| s_pic_limit_rec, |
| ps_curr_layer->i4_pad_x_rec, |
| ps_curr_layer->i4_pad_y_rec, |
| ps_curr_layer->i4_wd, |
| ps_curr_layer->i4_ht, |
| s_search_prms_blk.i4_num_steps_post_refine); |
| |
| /*************************************************************************/ |
| /* set the MV limit per ref. pic. */ |
| /* - P pic. : Based on the config params. */ |
| /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */ |
| /*************************************************************************/ |
| hme_set_mv_limit_using_dvsr_data( |
| ps_ctxt, ps_curr_layer, as_mv_limit, &i2_prev_enc_frm_max_mv_y, num_act_ref_pics); |
| s_srch_cand_init_data.pu1_num_fpel_search_cands = ps_refine_prms->au1_num_fpel_search_cands; |
| s_srch_cand_init_data.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0; |
| s_srch_cand_init_data.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1; |
| s_srch_cand_init_data.ps_coarse_layer = ps_coarse_layer; |
| s_srch_cand_init_data.ps_curr_layer = ps_curr_layer; |
| s_srch_cand_init_data.i4_max_num_init_cands = num_init_candts; |
| s_srch_cand_init_data.ps_search_cands = ps_search_candts; |
| s_srch_cand_init_data.u1_num_results_in_mvbank = s_mv_update_prms.i4_num_results_to_store; |
| s_srch_cand_init_data.pi4_ref_id_lc_to_l0_map = ps_ctxt->a_ref_idx_lc_to_l0; |
| s_srch_cand_init_data.pi4_ref_id_lc_to_l1_map = ps_ctxt->a_ref_idx_lc_to_l1; |
| s_srch_cand_init_data.e_search_blk_size = e_search_blk_size; |
| |
| while(0 == end_of_frame) |
| { |
| job_queue_t *ps_job; |
| frm_ctb_ctxt_t *ps_frm_ctb_prms; |
| ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb; |
| |
| WORD32 i4_max_mv_x_in_ctb; |
| WORD32 i4_max_mv_y_in_ctb; |
| void *pv_dep_mngr_encloop_dep_me; |
| WORD32 offset_val, check_dep_pos, set_dep_pos; |
| WORD32 left_ctb_in_diff_tile, i4_first_ctb_x = 0; |
| |
| pv_dep_mngr_encloop_dep_me = ps_ctxt->pv_dep_mngr_encloop_dep_me; |
| |
| ps_frm_ctb_prms = (frm_ctb_ctxt_t *)ps_thrd_ctxt->pv_ext_frm_prms; |
| |
| /* Get the current row from the job queue */ |
| ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job( |
| ps_multi_thrd_ctxt, lyr_job_type, 1, me_frm_id); |
| |
| /* If all rows are done, set the end of process flag to 1, */ |
| /* and the current row to -1 */ |
| if(NULL == ps_job) |
| { |
| blk_y = -1; |
| i4_ctb_y = -1; |
| tile_col_idx = -1; |
| end_of_frame = 1; |
| |
| continue; |
| } |
| |
| /* set the output dependency after picking up the row */ |
| ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, me_frm_id); |
| |
| /* Obtain the current row's details from the job */ |
| { |
| ihevce_tile_params_t *ps_col_tile_params; |
| |
| i4_ctb_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no; |
| /* Obtain the current colum tile index from the job */ |
| tile_col_idx = ps_job->s_job_info.s_me_job_info.i4_tile_col_idx; |
| |
| /* in encode layer block are 16x16 and CTB is 64 x 64 */ |
| /* note if ctb is 32x32 the this calc needs to be changed */ |
| num_sync_units_in_row = (i4_pic_wd + ((1 << ps_ctxt->log_ctb_size) - 1)) >> |
| ps_ctxt->log_ctb_size; |
| |
| /* The tile parameter for the col. idx. Use only the properties |
| which is same for all the bottom tiles like width, start_x, etc. |
| Don't use height, start_y, etc. */ |
| ps_col_tile_params = |
| ((ihevce_tile_params_t *)ps_thrd_ctxt->pv_tile_params_base + tile_col_idx); |
| /* in encode layer block are 16x16 and CTB is 64 x 64 */ |
| /* note if ctb is 32x32 the this calc needs to be changed */ |
| num_sync_units_in_tile = |
| (ps_col_tile_params->i4_curr_tile_width + ((1 << ps_ctxt->log_ctb_size) - 1)) >> |
| ps_ctxt->log_ctb_size; |
| |
| i4_first_ctb_x = ps_col_tile_params->i4_first_ctb_x; |
| i4_ctb_x = i4_first_ctb_x; |
| |
| if(!num_act_ref_pics) |
| { |
| for(i4_ctb_x = i4_first_ctb_x; |
| i4_ctb_x < (ps_col_tile_params->i4_first_ctb_x + num_sync_units_in_tile); |
| i4_ctb_x++) |
| { |
| S32 blk_i = 0, blk_j = 0; |
| /* set the dependency for the corresponding row in enc loop */ |
| ihevce_dmgr_set_row_row_sync( |
| pv_dep_mngr_encloop_dep_me, |
| (i4_ctb_x + 1), |
| i4_ctb_y, |
| tile_col_idx /* Col Tile No. */); |
| } |
| |
| continue; |
| } |
| |
| /* increment the number of rows proc */ |
| num_rows_proc++; |
| |
| /* Set Variables for Dep. Checking and Setting */ |
| set_dep_pos = i4_ctb_y + 1; |
| if(i4_ctb_y > 0) |
| { |
| offset_val = 2; |
| check_dep_pos = i4_ctb_y - 1; |
| } |
| else |
| { |
| /* First row should run without waiting */ |
| offset_val = -1; |
| check_dep_pos = 0; |
| } |
| |
| /* row ctb out pointer */ |
| ps_ctxt->ps_ctb_analyse_curr_row = |
| ps_ctxt->ps_ctb_analyse_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz; |
| |
| /* Row level CU Tree buffer */ |
| ps_ctxt->ps_cu_tree_curr_row = |
| ps_ctxt->ps_cu_tree_base + |
| i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE; |
| |
| ps_ctxt->ps_me_ctb_data_curr_row = |
| ps_ctxt->ps_me_ctb_data_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz; |
| } |
| |
| /* This flag says the CTB under processing is at the start of tile in horz dir.*/ |
| left_ctb_in_diff_tile = 1; |
| |
| /* To make sure no 64-bit overflow happens when inv_wt is multiplied with un-normalized src_var, */ |
| /* the shift value will be passed onto the functions wherever inv_wt isused so that inv_wt is appropriately shift and multiplied */ |
| { |
| S32 i4_ref_id, i4_bits_req; |
| |
| for(i4_ref_id = 0; i4_ref_id < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + |
| ps_ctxt->s_frm_prms.u1_num_active_ref_l1); |
| i4_ref_id++) |
| { |
| GETRANGE(i4_bits_req, ps_ctxt->s_wt_pred.a_inv_wpred_wt[i4_ref_id]); |
| |
| if(i4_bits_req > 12) |
| { |
| ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = (i4_bits_req - 12); |
| } |
| else |
| { |
| ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = 0; |
| } |
| } |
| |
| s_common_frm_prms.pi4_inv_wt_shift_val = ps_ctxt->s_wt_pred.ai4_shift_val; |
| } |
| |
| /* if non-encode layer then i4_ctb_x will be same as blk_x */ |
| /* loop over all the units is a row */ |
| for(i4_ctb_x = i4_first_ctb_x; i4_ctb_x < (i4_first_ctb_x + num_sync_units_in_tile); |
| i4_ctb_x++) |
| { |
| ihevce_ctb_noise_params *ps_ctb_noise_params = |
| &ps_ctxt->ps_ctb_analyse_curr_row[i4_ctb_x].s_ctb_noise_params; |
| |
| s_common_frm_prms.i4_ctb_x_off = i4_ctb_x << 6; |
| s_common_frm_prms.i4_ctb_y_off = i4_ctb_y << 6; |
| |
| ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = i4_ctb_y << 6; |
| ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = i4_ctb_x << 6; |
| /* Initialize ptr to current IPE CTB */ |
| ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + |
| i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz; |
| { |
| ps_ctb_bound_attrs = |
| get_ctb_attrs(i4_ctb_x << 6, i4_ctb_y << 6, i4_pic_wd, i4_pic_ht, ps_ctxt); |
| |
| en_merge_32x32 = ps_ctb_bound_attrs->u1_merge_to_32x32_flag; |
| num_blks_in_this_ctb = ps_ctb_bound_attrs->u1_num_blks_in_ctb; |
| } |
| |
| /* Block to initialise pointers to part_type_results_t */ |
| /* in each size-specific inter_cu_results_t */ |
| { |
| WORD32 i; |
| |
| for(i = 0; i < 64; i++) |
| { |
| ps_ctxt->as_cu8x8_results[i].ps_best_results = |
| ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x] |
| .as_8x8_block_data[i] |
| .as_best_results; |
| ps_ctxt->as_cu8x8_results[i].u1_num_best_results = 0; |
| } |
| |
| for(i = 0; i < 16; i++) |
| { |
| ps_ctxt->as_cu16x16_results[i].ps_best_results = |
| ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].as_block_data[i].as_best_results; |
| ps_ctxt->as_cu16x16_results[i].u1_num_best_results = 0; |
| } |
| |
| for(i = 0; i < 4; i++) |
| { |
| ps_ctxt->as_cu32x32_results[i].ps_best_results = |
| ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x] |
| .as_32x32_block_data[i] |
| .as_best_results; |
| ps_ctxt->as_cu32x32_results[i].u1_num_best_results = 0; |
| } |
| |
| ps_ctxt->s_cu64x64_results.ps_best_results = |
| ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].s_64x64_block_data.as_best_results; |
| ps_ctxt->s_cu64x64_results.u1_num_best_results = 0; |
| } |
| |
| if(ME_PRISTINE_QUALITY == e_me_quality_presets) |
| { |
| ps_ctb_cluster_info->blk_32x32_mask = en_merge_32x32; |
| ps_ctb_cluster_info->ps_cur_ipe_ctb = ps_cur_ipe_ctb; |
| ps_ctb_cluster_info->ps_cu_tree_root = |
| ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE); |
| ps_ctb_cluster_info->nodes_created_in_cu_tree = 1; |
| } |
| |
| if(ME_PRISTINE_QUALITY != e_me_quality_presets) |
| { |
| S32 i4_nodes_created_in_cu_tree = 1; |
| |
| ihevce_cu_tree_init( |
| (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)), |
| (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)), |
| &i4_nodes_created_in_cu_tree, |
| 0, |
| POS_NA, |
| POS_NA, |
| POS_NA); |
| } |
| |
| memset(ai4_blk_8x8_mask, 0, 16 * sizeof(S32)); |
| |
| if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb) |
| { |
| S32 j; |
| |
| ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb; |
| |
| ps_cur_ipe_ctb = |
| ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + i4_ctb_y * num_sync_units_in_row; |
| lambda_recon = |
| hme_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_cur_ipe_ctb); |
| |
| lambda_recon = ((float)lambda_recon * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f); |
| |
| for(i = 0; i < 4; i++) |
| { |
| ps_search_results = &ps_ctxt->as_search_results_32x32[i]; |
| |
| for(j = 0; j < 2; j++) |
| { |
| ps_search_results->as_pred_ctxt[j].lambda = lambda_recon; |
| } |
| } |
| ps_search_results = &ps_ctxt->s_search_results_64x64; |
| |
| for(j = 0; j < 2; j++) |
| { |
| ps_search_results->as_pred_ctxt[j].lambda = lambda_recon; |
| } |
| |
| s_common_frm_prms.i4_lamda = lambda_recon; |
| } |
| else |
| { |
| lambda_recon = ps_refine_prms->lambda_recon; |
| } |
| |
| /*********************************************************************/ |
| /* replicate the inp buffer at blk or ctb level for each ref id, */ |
| /* Instead of searching with wk * ref(k), we search with Ik = I / wk */ |
| /* thereby avoiding a bloat up of memory. If we did all references */ |
| /* weighted pred, we will end up with a duplicate copy of each ref */ |
| /* at each layer, since we need to preserve the original reference. */ |
| /* ToDo: Need to observe performance with this mechanism and compare */ |
| /* with case where ref is weighted. */ |
| /*********************************************************************/ |
| fp_get_wt_inp( |
| ps_curr_layer, |
| &ps_ctxt->s_wt_pred, |
| unit_size, |
| s_common_frm_prms.i4_ctb_x_off, |
| s_common_frm_prms.i4_ctb_y_off, |
| unit_size, |
| ps_ctxt->num_ref_future + ps_ctxt->num_ref_past, |
| ps_ctxt->i4_wt_pred_enable_flag); |
| |
| if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled) |
| { |
| #if TEMPORAL_NOISE_DETECT |
| { |
| WORD32 had_block_size = 16; |
| WORD32 ctb_width = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64) |
| ? 64 |
| : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off; |
| WORD32 ctb_height = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64) |
| ? 64 |
| : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off; |
| WORD32 num_pred_dir = i4_num_pred_dir; |
| WORD32 i4_x_off = s_common_frm_prms.i4_ctb_x_off; |
| WORD32 i4_y_off = s_common_frm_prms.i4_ctb_y_off; |
| |
| WORD32 i; |
| WORD32 noise_detected; |
| WORD32 ctb_size; |
| WORD32 num_comp_had_blocks; |
| WORD32 noisy_block_cnt; |
| WORD32 index_8x8_block; |
| WORD32 num_8x8_in_ctb_row; |
| |
| WORD32 ht_offset; |
| WORD32 wd_offset; |
| WORD32 block_ht; |
| WORD32 block_wd; |
| |
| WORD32 num_horz_blocks; |
| WORD32 num_vert_blocks; |
| |
| WORD32 mean; |
| UWORD32 variance_8x8; |
| |
| WORD32 hh_energy_percent; |
| |
| /* variables to hold the constant values. The variable values held are decided by the HAD block size */ |
| WORD32 min_noisy_block_cnt; |
| WORD32 min_coeffs_above_avg; |
| WORD32 min_coeff_avg_energy; |
| |
| /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */ |
| WORD32 i4_cu_x_off, i4_cu_y_off; |
| WORD32 is_noisy; |
| |
| /* intialise the variables holding the constants */ |
| if(had_block_size == 8) |
| { |
| min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_8x8; //6;// |
| min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_8x8; |
| min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_8x8; |
| } |
| else |
| { |
| min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_16x16; //7;// |
| min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_16x16; |
| min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_16x16; |
| } |
| |
| /* initialize the variables */ |
| noise_detected = 0; |
| noisy_block_cnt = 0; |
| hh_energy_percent = 0; |
| variance_8x8 = 0; |
| block_ht = ctb_height; |
| block_wd = ctb_width; |
| |
| mean = 0; |
| |
| ctb_size = block_ht * block_wd; //ctb_width * ctb_height; |
| num_comp_had_blocks = ctb_size / (had_block_size * had_block_size); |
| |
| num_horz_blocks = block_wd / had_block_size; //ctb_width / had_block_size; |
| num_vert_blocks = block_ht / had_block_size; //ctb_height / had_block_size; |
| |
| ht_offset = -had_block_size; |
| wd_offset = -had_block_size; |
| |
| num_8x8_in_ctb_row = block_wd / 8; // number of 8x8 in this ctb |
| for(i = 0; i < num_comp_had_blocks; i++) |
| { |
| if(i % num_horz_blocks == 0) |
| { |
| wd_offset = -had_block_size; |
| ht_offset += had_block_size; |
| } |
| wd_offset += had_block_size; |
| |
| /* CU level offsets */ |
| i4_cu_x_off = i4_x_off + (i % 4) * 16; //+ (i % 4) * 16 |
| i4_cu_y_off = i4_y_off + (i / 4) * 16; |
| |
| /* if 50 % or more of the CU is noisy then the return value is 1 */ |
| is_noisy = ihevce_determine_cu_noise_based_on_8x8Blk_data( |
| ps_ctb_noise_params->au1_is_8x8Blk_noisy, |
| (i % 4) * 16, |
| (i / 4) * 16, |
| 16); |
| |
| /* only if the CU is noisy then check the temporal noise detect call is made on the CU */ |
| if(is_noisy) |
| { |
| index_8x8_block = (i / num_horz_blocks) * 2 * num_8x8_in_ctb_row + |
| (i % num_horz_blocks) * 2; |
| noisy_block_cnt += ihevce_16x16block_temporal_noise_detect( |
| 16, |
| ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64) |
| ? 64 |
| : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off, |
| ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64) |
| ? 64 |
| : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off, |
| ps_ctb_noise_params, |
| &s_srch_cand_init_data, |
| &s_search_prms_blk, |
| ps_ctxt, |
| num_pred_dir, |
| i4_num_act_ref_l0, |
| i4_num_act_ref_l1, |
| i4_cu_x_off, |
| i4_cu_y_off, |
| &ps_ctxt->s_wt_pred, |
| unit_size, |
| index_8x8_block, |
| num_horz_blocks, |
| /*num_8x8_in_ctb_row*/ 8, // this should be a variable extra |
| i); |
| } /* if 16x16 is noisy */ |
| } /* loop over for all 16x16*/ |
| |
| if(noisy_block_cnt >= min_noisy_block_cnt) |
| { |
| noise_detected = 1; |
| } |
| |
| /* write back the noise presence detected for the current CTB to the structure */ |
| ps_ctb_noise_params->i4_noise_present = noise_detected; |
| } |
| #endif |
| |
| #if EVERYWHERE_NOISY && USE_NOISE_TERM_IN_L0_ME |
| if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled && |
| ps_ctb_noise_params->i4_noise_present) |
| { |
| memset( |
| ps_ctb_noise_params->au1_is_8x8Blk_noisy, |
| 1, |
| sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy)); |
| } |
| #endif |
| |
| for(i = 0; i < 16; i++) |
| { |
| au1_is_16x16Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data( |
| ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 4) * 16, (i / 4) * 16, 16); |
| } |
| |
| for(i = 0; i < 4; i++) |
| { |
| au1_is_32x32Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data( |
| ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 2) * 32, (i / 2) * 32, 32); |
| } |
| |
| for(i = 0; i < 1; i++) |
| { |
| au1_is_64x64Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data( |
| ps_ctb_noise_params->au1_is_8x8Blk_noisy, 0, 0, 64); |
| } |
| |
| if(ps_ctxt->s_frm_prms.bidir_enabled && |
| (ps_ctxt->s_frm_prms.i4_temporal_layer_id <= |
| MAX_LAYER_ID_OF_B_PICS_WITHOUT_NOISE_DETECTION)) |
| { |
| ps_ctb_noise_params->i4_noise_present = 0; |
| memset( |
| ps_ctb_noise_params->au1_is_8x8Blk_noisy, |
| 0, |
| sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy)); |
| } |
| |
| #if ME_LAMBDA_DISCOUNT_WHEN_NOISY |
| for(i = 0; i < 4; i++) |
| { |
| S32 j; |
| S32 lambda; |
| |
| if(au1_is_32x32Blk_noisy[i]) |
| { |
| lambda = lambda_recon; |
| lambda = |
| ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f); |
| |
| ps_search_results = &ps_ctxt->as_search_results_32x32[i]; |
| |
| for(j = 0; j < 2; j++) |
| { |
| ps_search_results->as_pred_ctxt[j].lambda = lambda; |
| } |
| } |
| } |
| |
| { |
| S32 j; |
| S32 lambda; |
| |
| if(au1_is_64x64Blk_noisy[0]) |
| { |
| lambda = lambda_recon; |
| lambda = |
| ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f); |
| |
| ps_search_results = &ps_ctxt->s_search_results_64x64; |
| |
| for(j = 0; j < 2; j++) |
| { |
| ps_search_results->as_pred_ctxt[j].lambda = lambda; |
| } |
| } |
| } |
| #endif |
| if(au1_is_64x64Blk_noisy[0]) |
| { |
| U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off + |
| (s_common_frm_prms.i4_ctb_y_off * |
| ps_curr_layer->i4_inp_stride)); |
| |
| hme_compute_sigmaX_and_sigmaXSquared( |
| pu1_inp, |
| ps_curr_layer->i4_inp_stride, |
| ps_ctxt->au4_4x4_src_sigmaX, |
| ps_ctxt->au4_4x4_src_sigmaXSquared, |
| 4, |
| 4, |
| 64, |
| 64, |
| 1, |
| 16); |
| } |
| else |
| { |
| for(i = 0; i < 4; i++) |
| { |
| if(au1_is_32x32Blk_noisy[i]) |
| { |
| U08 *pu1_inp = |
| ps_curr_layer->pu1_inp + |
| (s_common_frm_prms.i4_ctb_x_off + |
| (s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride)); |
| |
| U08 u1_cu_size = 32; |
| WORD32 i4_inp_buf_offset = |
| (((i / 2) * (u1_cu_size * ps_curr_layer->i4_inp_stride)) + |
| ((i % 2) * u1_cu_size)); |
| |
| U16 u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb = 128; |
| U16 u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb = 8; |
| S32 i4_sigma_arr_offset = |
| (((i / 2) * u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb) + |
| ((i % 2) * u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb)); |
| |
| hme_compute_sigmaX_and_sigmaXSquared( |
| pu1_inp + i4_inp_buf_offset, |
| ps_curr_layer->i4_inp_stride, |
| ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset, |
| ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset, |
| 4, |
| 4, |
| 32, |
| 32, |
| 1, |
| 16); |
| } |
| else |
| { |
| S32 j; |
| |
| U08 u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb = 8; |
| U08 u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb = 2; |
| S32 i4_16x16_blk_start_index_in_i_th_32x32_blk = |
| (((i / 2) * u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb) + |
| ((i % 2) * u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb)); |
| |
| for(j = 0; j < 4; j++) |
| { |
| U08 u1_3rd_16x16_blk_index_in_32x32_blk = 4; |
| U08 u1_2nd_16x16_blk_index_in_32x32_blk = 1; |
| S32 i4_16x16_blk_index_in_ctb = |
| i4_16x16_blk_start_index_in_i_th_32x32_blk + |
| ((j % 2) * u1_2nd_16x16_blk_index_in_32x32_blk) + |
| ((j / 2) * u1_3rd_16x16_blk_index_in_32x32_blk); |
| |
| //S32 k = (((i / 2) * 8) + ((i % 2) * 2)) + ((j % 2) * 1) + ((j / 2) * 4); |
| |
| if(au1_is_16x16Blk_noisy[i4_16x16_blk_index_in_ctb]) |
| { |
| U08 *pu1_inp = |
| ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off + |
| (s_common_frm_prms.i4_ctb_y_off * |
| ps_curr_layer->i4_inp_stride)); |
| |
| U08 u1_cu_size = 16; |
| WORD32 i4_inp_buf_offset = |
| (((i4_16x16_blk_index_in_ctb % 4) * u1_cu_size) + |
| ((i4_16x16_blk_index_in_ctb / 4) * |
| (u1_cu_size * ps_curr_layer->i4_inp_stride))); |
| |
| U16 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk = 64; |
| U16 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk = 4; |
| S32 i4_sigma_arr_offset = |
| (((i4_16x16_blk_index_in_ctb % 4) * |
| u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk) + |
| ((i4_16x16_blk_index_in_ctb / 4) * |
| u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk)); |
| |
| hme_compute_sigmaX_and_sigmaXSquared( |
| pu1_inp + i4_inp_buf_offset, |
| ps_curr_layer->i4_inp_stride, |
| (ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset), |
| (ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset), |
| 4, |
| 4, |
| 16, |
| 16, |
| 1, |
| 16); |
| } |
| } |
| } |
| } |
| } |
| } |
| else |
| { |
| memset(au1_is_16x16Blk_noisy, 0, sizeof(au1_is_16x16Blk_noisy)); |
| |
| memset(au1_is_32x32Blk_noisy, 0, sizeof(au1_is_32x32Blk_noisy)); |
| |
| memset(au1_is_64x64Blk_noisy, 0, sizeof(au1_is_64x64Blk_noisy)); |
| } |
| |
| for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++) |
| { |
| S32 ref_ctr; |
| U08 au1_pred_dir_searched[2]; |
| U08 u1_is_cu_noisy; |
| ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17]; |
| |
| { |
| blk_x = (i4_ctb_x << 2) + |
| (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_x); |
| blk_y = (i4_ctb_y << 2) + |
| (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_y); |
| |
| blk_id_in_full_ctb = |
| ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_id_in_full_ctb; |
| blk_8x8_mask = ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_8x8_mask; |
| ai4_blk_8x8_mask[blk_id_in_full_ctb] = blk_8x8_mask; |
| s_search_prms_blk.i4_cu_x_off = (blk_x << blk_size_shift) - (i4_ctb_x << 6); |
| s_search_prms_blk.i4_cu_y_off = (blk_y << blk_size_shift) - (i4_ctb_y << 6); |
| } |
| |
| /* get the current input blk point */ |
| pos_x = blk_x << blk_size_shift; |
| pos_y = blk_y << blk_size_shift; |
| pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride); |
| |
| /*********************************************************************/ |
| /* For every blk in the picture, the search range needs to be derived*/ |
| /* Any blk can have any mv, but practical search constraints are */ |
| /* imposed by the picture boundary and amt of padding. */ |
| /*********************************************************************/ |
| /* MV limit is different based on ref. PIC */ |
| for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) |
| { |
| if(!s_search_prms_blk.i4_use_rec) |
| { |
| hme_derive_search_range( |
| &as_range_prms_inp[ref_ctr], |
| &s_pic_limit_inp, |
| &as_mv_limit[ref_ctr], |
| pos_x, |
| pos_y, |
| blk_wd, |
| blk_ht); |
| } |
| else |
| { |
| hme_derive_search_range( |
| &as_range_prms_rec[ref_ctr], |
| &s_pic_limit_rec, |
| &as_mv_limit[ref_ctr], |
| pos_x, |
| pos_y, |
| blk_wd, |
| blk_ht); |
| } |
| } |
| s_search_prms_blk.i4_x_off = blk_x << blk_size_shift; |
| s_search_prms_blk.i4_y_off = blk_y << blk_size_shift; |
| /* Select search results from a suitable search result in the context */ |
| { |
| ps_search_results = &ps_ctxt->as_search_results_16x16[blk_id_in_full_ctb]; |
| |
| if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb) |
| { |
| S32 i; |
| |
| for(i = 0; i < 2; i++) |
| { |
| ps_search_results->as_pred_ctxt[i].lambda = lambda_recon; |
| } |
| } |
| } |
| |
| u1_is_cu_noisy = au1_is_16x16Blk_noisy |
| [(s_search_prms_blk.i4_cu_x_off >> 4) + (s_search_prms_blk.i4_cu_y_off >> 2)]; |
| |
| s_subpel_prms.u1_is_cu_noisy = u1_is_cu_noisy; |
| |
| #if ME_LAMBDA_DISCOUNT_WHEN_NOISY |
| if(u1_is_cu_noisy) |
| { |
| S32 j; |
| S32 lambda; |
| |
| lambda = lambda_recon; |
| lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f); |
| |
| for(j = 0; j < 2; j++) |
| { |
| ps_search_results->as_pred_ctxt[j].lambda = lambda; |
| } |
| } |
| else |
| { |
| S32 j; |
| S32 lambda; |
| |
| lambda = lambda_recon; |
| |
| for(j = 0; j < 2; j++) |
| { |
| ps_search_results->as_pred_ctxt[j].lambda = lambda; |
| } |
| } |
| #endif |
| |
| s_search_prms_blk.ps_search_results = ps_search_results; |
| |
| s_search_prms_blk.i4_part_mask = hme_part_mask_populator( |
| pu1_inp, |
| i4_inp_stride, |
| ps_refine_prms->limit_active_partitions, |
| ps_ctxt->ps_hme_frm_prms->bidir_enabled, |
| ps_ctxt->u1_is_curFrame_a_refFrame, |
| blk_8x8_mask, |
| e_me_quality_presets); |
| |
| if(ME_PRISTINE_QUALITY == e_me_quality_presets) |
| { |
| ps_ctb_cluster_info->ai4_part_mask[blk_id_in_full_ctb] = |
| s_search_prms_blk.i4_part_mask; |
| } |
| |
| /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */ |
| { |
| /* Setting u1_num_active_refs to 2 */ |
| /* for the sole purpose of the */ |
| /* function called below */ |
| ps_search_results->u1_num_active_ref = (ps_refine_prms->bidir_enabled) ? 2 : 1; |
| |
| hme_reset_search_results( |
| ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL); |
| |
| ps_search_results->u1_num_active_ref = i4_num_pred_dir; |
| } |
| |
| if(0 == blk_id_in_ctb) |
| { |
| UWORD8 u1_ctr; |
| for(u1_ctr = 0; u1_ctr < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + |
| ps_ctxt->s_frm_prms.u1_num_active_ref_l1); |
| u1_ctr++) |
| { |
| WORD32 i4_max_dep_ctb_y; |
| WORD32 i4_max_dep_ctb_x; |
| |
| /* Set max mv in ctb units */ |
| i4_max_mv_x_in_ctb = |
| (ps_curr_layer->i2_max_mv_x + ((1 << ps_ctxt->log_ctb_size) - 1)) >> |
| ps_ctxt->log_ctb_size; |
| |
| i4_max_mv_y_in_ctb = |
| (as_mv_limit[u1_ctr].i2_max_y + ((1 << ps_ctxt->log_ctb_size) - 1)) >> |
| ps_ctxt->log_ctb_size; |
| /********************************************************************/ |
| /* Set max ctb_x and ctb_y dependency on reference picture */ |
| /* Note +1 is due to delayed deblock, SAO, subpel plan dependency */ |
| /********************************************************************/ |
| i4_max_dep_ctb_x = CLIP3( |
| (i4_ctb_x + i4_max_mv_x_in_ctb + 1), |
| 0, |
| ps_frm_ctb_prms->i4_num_ctbs_horz - 1); |
| i4_max_dep_ctb_y = CLIP3( |
| (i4_ctb_y + i4_max_mv_y_in_ctb + 1), |
| 0, |
| ps_frm_ctb_prms->i4_num_ctbs_vert - 1); |
| |
| ihevce_dmgr_map_chk_sync( |
| ps_curr_layer->ppv_dep_mngr_recon[u1_ctr], |
| ps_ctxt->thrd_id, |
| i4_ctb_x, |
| i4_ctb_y, |
| i4_max_mv_x_in_ctb, |
| i4_max_mv_y_in_ctb); |
| } |
| } |
| |
| /* Loop across different Ref IDx */ |
| for(u1_pred_dir_ctr = 0; u1_pred_dir_ctr < i4_num_pred_dir; u1_pred_dir_ctr++) |
| { |
| S32 resultid; |
| S08 u1_default_ref_id; |
| S32 i4_num_srch_cands = 0; |
| S32 i4_num_refinement_iterations; |
| S32 i4_refine_iter_ctr; |
| |
| if((i4_num_pred_dir == 2) || (!ps_ctxt->s_frm_prms.bidir_enabled) || |
| (ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0)) |
| { |
| u1_pred_dir = u1_pred_dir_ctr; |
| } |
| else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0) |
| { |
| u1_pred_dir = 1; |
| } |
| |
| u1_default_ref_id = (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list[0] |
| : ps_ctxt->ai1_future_list[0]; |
| au1_pred_dir_searched[u1_pred_dir_ctr] = u1_pred_dir; |
| |
| i4_num_srch_cands = 0; |
| resultid = 0; |
| |
| /* START OF NEW CTB MEANS FILL UP NEOGHBOURS IN 18x18 GRID */ |
| if(0 == blk_id_in_ctb) |
| { |
| /*****************************************************************/ |
| /* Initialize the mv grid with results of neighbours for the next*/ |
| /* ctb. */ |
| /*****************************************************************/ |
| hme_fill_ctb_neighbour_mvs( |
| ps_curr_layer, |
| blk_x, |
| blk_y, |
| aps_mv_grid[u1_pred_dir], |
| u1_pred_dir_ctr, |
| u1_default_ref_id, |
| ps_ctxt->s_frm_prms.u1_num_active_ref_l0); |
| } |
| |
| s_search_prms_blk.i1_ref_idx = u1_pred_dir; |
| |
| { |
| if((blk_id_in_full_ctb % 4) == 0) |
| { |
| ps_ctxt->as_search_results_32x32[blk_id_in_full_ctb >> 2] |
| .as_pred_ctxt[u1_pred_dir] |
| .proj_used = (blk_id_in_full_ctb == 8) ? 0 : 1; |
| } |
| |
| if(blk_id_in_full_ctb == 0) |
| { |
| ps_ctxt->s_search_results_64x64.as_pred_ctxt[u1_pred_dir].proj_used = 1; |
| } |
| |
| ps_search_results->as_pred_ctxt[u1_pred_dir].proj_used = |
| !gau1_encode_to_raster_y[blk_id_in_full_ctb]; |
| } |
| |
| { |
| S32 x = gau1_encode_to_raster_x[blk_id_in_full_ctb]; |
| S32 y = gau1_encode_to_raster_y[blk_id_in_full_ctb]; |
| U08 u1_is_blk_at_ctb_boundary = !y; |
| |
| s_srch_cand_init_data.u1_is_left_available = |
| !(left_ctb_in_diff_tile && !s_search_prms_blk.i4_cu_x_off); |
| |
| if(u1_is_blk_at_ctb_boundary) |
| { |
| s_srch_cand_init_data.u1_is_topRight_available = 0; |
| s_srch_cand_init_data.u1_is_topLeft_available = 0; |
| s_srch_cand_init_data.u1_is_top_available = 0; |
| } |
| else |
| { |
| s_srch_cand_init_data.u1_is_topRight_available = |
| gau1_cu_tr_valid[y][x] && ((pos_x + blk_wd) < i4_pic_wd); |
| s_srch_cand_init_data.u1_is_top_available = 1; |
| s_srch_cand_init_data.u1_is_topLeft_available = |
| s_srch_cand_init_data.u1_is_left_available; |
| } |
| } |
| |
| s_srch_cand_init_data.i1_default_ref_id = u1_default_ref_id; |
| s_srch_cand_init_data.i1_alt_default_ref_id = ps_ctxt->ai1_past_list[1]; |
| s_srch_cand_init_data.i4_pos_x = pos_x; |
| s_srch_cand_init_data.i4_pos_y = pos_y; |
| s_srch_cand_init_data.u1_pred_dir = u1_pred_dir; |
| s_srch_cand_init_data.u1_pred_dir_ctr = u1_pred_dir_ctr; |
| s_srch_cand_init_data.u1_search_candidate_list_index = |
| au1_search_candidate_list_index[u1_pred_dir]; |
| |
| i4_num_srch_cands = hme_populate_search_candidates(&s_srch_cand_init_data); |
| |
| /* Note this block also clips the MV range for all candidates */ |
| { |
| S08 i1_check_for_mult_refs; |
| |
| i1_check_for_mult_refs = u1_pred_dir ? (ps_ctxt->num_ref_future > 1) |
| : (ps_ctxt->num_ref_past > 1); |
| |
| ps_me_optimised_function_list->pf_mv_clipper( |
| &s_search_prms_blk, |
| i4_num_srch_cands, |
| i1_check_for_mult_refs, |
| ps_refine_prms->i4_num_steps_fpel_refine, |
| ps_refine_prms->i4_num_steps_hpel_refine, |
| ps_refine_prms->i4_num_steps_qpel_refine); |
| } |
| |
| #if ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0 |
| i4_num_refinement_iterations = |
| ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) |
| ? ((e_me_quality_presets == ME_HIGH_QUALITY) ? 2 : i4_num_act_ref_l0) |
| : 1; |
| #else |
| i4_num_refinement_iterations = |
| ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) ? 2 : 1; |
| #endif |
| |
| #if ENABLE_EXPLICIT_SEARCH_IN_PQ |
| if(e_me_quality_presets == ME_PRISTINE_QUALITY) |
| { |
| i4_num_refinement_iterations = (u1_pred_dir == 0) ? i4_num_act_ref_l0 |
| : i4_num_act_ref_l1; |
| } |
| #endif |
| |
| for(i4_refine_iter_ctr = 0; i4_refine_iter_ctr < i4_num_refinement_iterations; |
| i4_refine_iter_ctr++) |
| { |
| S32 center_x; |
| S32 center_y; |
| S32 center_ref_idx; |
| |
| S08 *pi1_pred_dir_to_ref_idx = |
| (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list : ps_ctxt->ai1_future_list; |
| |
| { |
| WORD32 i4_i; |
| |
| for(i4_i = 0; i4_i < TOT_NUM_PARTS; i4_i++) |
| { |
| ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL; |
| ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL; |
| ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] = |
| MAX_SIGNED_16BIT_VAL; |
| ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] = 0; |
| ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] = 0; |
| ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] = u1_default_ref_id; |
| |
| if(ps_refine_prms->i4_num_results_per_part == 2) |
| { |
| ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] = |
| MAX_SIGNED_16BIT_VAL; |
| ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] = |
| MAX_SIGNED_16BIT_VAL; |
| ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] = |
| MAX_SIGNED_16BIT_VAL; |
| ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = 0; |
| ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = 0; |
| ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = u1_default_ref_id; |
| } |
| } |
| |
| s_search_prms_blk.ps_fullpel_refine_ctxt = ps_fullpel_refine_ctxt; |
| s_subpel_prms.ps_subpel_refine_ctxt = ps_fullpel_refine_ctxt; |
| } |
| |
| { |
| search_node_t *ps_coloc_node; |
| |
| S32 i = 0; |
| |
| if(i4_num_refinement_iterations > 1) |
| { |
| for(i = 0; i < ai4_num_coloc_cands[u1_pred_dir]; i++) |
| { |
| ps_coloc_node = |
| s_search_prms_blk.ps_search_candts[ai4_id_coloc[i]] |
| .ps_search_node; |
| |
| if(pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] == |
| ps_coloc_node->i1_ref_idx) |
| { |
| break; |
| } |
| } |
| |
| if(i == ai4_num_coloc_cands[u1_pred_dir]) |
| { |
| i = 0; |
| } |
| } |
| else |
| { |
| ps_coloc_node = s_search_prms_blk.ps_search_candts[ai4_id_coloc[0]] |
| .ps_search_node; |
| } |
| |
| hme_set_mvp_node( |
| ps_search_results, |
| ps_coloc_node, |
| u1_pred_dir, |
| (i4_num_refinement_iterations > 1) |
| ? pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] |
| : u1_default_ref_id); |
| |
| center_x = ps_coloc_node->ps_mv->i2_mvx; |
| center_y = ps_coloc_node->ps_mv->i2_mvy; |
| center_ref_idx = ps_coloc_node->i1_ref_idx; |
| } |
| |
| /* Full-Pel search */ |
| { |
| S32 num_unique_nodes; |
| |
| memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map)); |
| |
| num_unique_nodes = hme_remove_duplicate_fpel_search_candidates( |
| as_unique_search_nodes, |
| s_search_prms_blk.ps_search_candts, |
| au4_unique_node_map, |
| pi1_pred_dir_to_ref_idx, |
| i4_num_srch_cands, |
| s_search_prms_blk.i4_num_init_candts, |
| i4_refine_iter_ctr, |
| i4_num_refinement_iterations, |
| i4_num_act_ref_l0, |
| center_ref_idx, |
| center_x, |
| center_y, |
| ps_ctxt->s_frm_prms.bidir_enabled, |
| e_me_quality_presets); |
| |
| /*************************************************************************/ |
| /* This array stores the ids of the partitions whose */ |
| /* SADs are updated. Since the partitions whose SADs are updated may not */ |
| /* be in contiguous order, we supply another level of indirection. */ |
| /*************************************************************************/ |
| ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids( |
| s_search_prms_blk.i4_part_mask, |
| &ps_fullpel_refine_ctxt->ai4_part_id[0]); |
| |
| if(!i4_refine_iter_ctr && !u1_pred_dir_ctr && u1_is_cu_noisy) |
| { |
| S32 i; |
| /*i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values*/ |
| S32 i4_sigma_array_offset = (s_search_prms_blk.i4_cu_x_off / 4) + |
| (s_search_prms_blk.i4_cu_y_off * 4); |
| |
| for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++) |
| { |
| S32 i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i]; |
| |
| hme_compute_final_sigma_of_pu_from_base_blocks( |
| ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset, |
| ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset, |
| au8_final_src_sigmaX, |
| au8_final_src_sigmaXSquared, |
| 16, |
| 4, |
| i4_part_id, |
| 16); |
| } |
| |
| s_common_frm_prms.pu8_part_src_sigmaX = au8_final_src_sigmaX; |
| s_common_frm_prms.pu8_part_src_sigmaXSquared = |
| au8_final_src_sigmaXSquared; |
| |
| s_search_prms_blk.pu8_part_src_sigmaX = au8_final_src_sigmaX; |
| s_search_prms_blk.pu8_part_src_sigmaXSquared = |
| au8_final_src_sigmaXSquared; |
| } |
| |
| if(0 == num_unique_nodes) |
| { |
| continue; |
| } |
| |
| if(num_unique_nodes >= 2) |
| { |
| s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0]; |
| s_search_prms_blk.i4_num_search_nodes = num_unique_nodes; |
| if(ps_ctxt->i4_pic_type != IV_P_FRAME) |
| { |
| if(ps_ctxt->i4_temporal_layer == 1) |
| { |
| hme_fullpel_cand_sifter( |
| &s_search_prms_blk, |
| ps_curr_layer, |
| &ps_ctxt->s_wt_pred, |
| ALPHA_FOR_NOISE_TERM_IN_ME, |
| u1_is_cu_noisy, |
| ps_me_optimised_function_list); |
| } |
| else |
| { |
| hme_fullpel_cand_sifter( |
| &s_search_prms_blk, |
| ps_curr_layer, |
| &ps_ctxt->s_wt_pred, |
| ALPHA_FOR_NOISE_TERM_IN_ME, |
| u1_is_cu_noisy, |
| ps_me_optimised_function_list); |
| } |
| } |
| else |
| { |
| hme_fullpel_cand_sifter( |
| &s_search_prms_blk, |
| ps_curr_layer, |
| &ps_ctxt->s_wt_pred, |
| ALPHA_FOR_NOISE_TERM_IN_ME_P, |
| u1_is_cu_noisy, |
| ps_me_optimised_function_list); |
| } |
| } |
| |
| s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0]; |
| |
| hme_fullpel_refine( |
| ps_refine_prms, |
| &s_search_prms_blk, |
| ps_curr_layer, |
| &ps_ctxt->s_wt_pred, |
| au4_unique_node_map, |
| num_unique_nodes, |
| blk_8x8_mask, |
| center_x, |
| center_y, |
| center_ref_idx, |
| e_me_quality_presets, |
| ps_me_optimised_function_list); |
| } |
| |
| /* Sub-Pel search */ |
| { |
| hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr); |
| |
| s_subpel_prms.pu1_wkg_mem = (U08 *)hme_get_wkg_mem( |
| &ps_ctxt->s_buf_mgr, |
| INTERP_INTERMED_BUF_SIZE + INTERP_OUT_BUF_SIZE); |
| /* MV limit is different based on ref. PIC */ |
| for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) |
| { |
| SCALE_RANGE_PRMS( |
| as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1); |
| SCALE_RANGE_PRMS( |
| as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2); |
| } |
| s_subpel_prms.i4_ctb_x_off = i4_ctb_x << 6; |
| s_subpel_prms.i4_ctb_y_off = i4_ctb_y << 6; |
| |
| hme_subpel_refine_cu_hs( |
| &s_subpel_prms, |
| ps_curr_layer, |
| ps_search_results, |
| u1_pred_dir, |
| &ps_ctxt->s_wt_pred, |
| blk_8x8_mask, |
| ps_ctxt->ps_func_selector, |
| ps_cmn_utils_optimised_function_list, |
| ps_me_optimised_function_list); |
| } |
| } |
| } |
| /* Populate the new PU struct with the results post subpel refinement*/ |
| { |
| inter_cu_results_t *ps_cu_results; |
| WORD32 best_inter_cost, intra_cost, posx, posy; |
| |
| UWORD8 intra_8x8_enabled = 0; |
| |
| /* cost of 16x16 cu parent */ |
| WORD32 parent_cost = MAX_32BIT_VAL; |
| |
| /* cost of 8x8 cu children */ |
| /*********************************************************************/ |
| /* Assuming parent is not split, then we signal 1 bit for this parent*/ |
| /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */ |
| /* So, 4*lambda is extra for children cost. */ |
| /*********************************************************************/ |
| WORD32 child_cost = 0; |
| |
| ps_cu_results = ps_search_results->ps_cu_results; |
| |
| /* Initialize the pu_results pointers to the first struct in the stack array */ |
| ps_pu_results = as_inter_pu_results; |
| |
| hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr); |
| |
| hme_populate_pus( |
| ps_thrd_ctxt, |
| ps_ctxt, |
| &s_subpel_prms, |
| ps_search_results, |
| ps_cu_results, |
| ps_pu_results, |
| &(as_pu_results[0][0][0]), |
| &s_common_frm_prms, |
| &ps_ctxt->s_wt_pred, |
| ps_curr_layer, |
| au1_pred_dir_searched, |
| i4_num_pred_dir); |
| |
| ps_cu_results->i4_inp_offset = |
| (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64); |
| |
| hme_decide_part_types( |
| ps_cu_results, |
| ps_pu_results, |
| &s_common_frm_prms, |
| ps_ctxt, |
| ps_cmn_utils_optimised_function_list, |
| ps_me_optimised_function_list |
| |
| ); |
| |
| /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */ |
| /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ |
| if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) |
| { |
| WORD32 res_ctr; |
| |
| for(res_ctr = 0; res_ctr < ps_cu_results->u1_num_best_results; res_ctr++) |
| { |
| WORD32 num_part = 2, part_ctr; |
| part_type_results_t *ps_best_results = |
| &ps_cu_results->ps_best_results[res_ctr]; |
| |
| if(PRT_2Nx2N == ps_best_results->u1_part_type) |
| num_part = 1; |
| |
| for(part_ctr = 0; part_ctr < num_part; part_ctr++) |
| { |
| pu_result_t *ps_pu_results = |
| &ps_best_results->as_pu_results[part_ctr]; |
| |
| ASSERT(PRED_L0 == ps_pu_results->pu.b2_pred_mode); |
| |
| hme_update_dynamic_search_params( |
| &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p] |
| .as_dyn_range_prms[ps_pu_results->pu.mv.i1_l0_ref_idx], |
| ps_pu_results->pu.mv.s_l0_mv.i2_mvy); |
| |
| /* Sanity Check */ |
| ASSERT( |
| ps_pu_results->pu.mv.i1_l0_ref_idx < |
| ps_ctxt->s_frm_prms.u1_num_active_ref_l0); |
| |
| /* No L1 for P Pic. */ |
| ASSERT(PRED_L1 != ps_pu_results->pu.b2_pred_mode); |
| /* No BI for P Pic. */ |
| ASSERT(PRED_BI != ps_pu_results->pu.b2_pred_mode); |
| } |
| } |
| } |
| |
| /*****************************************************************/ |
| /* INSERT INTRA RESULTS AT 16x16 LEVEL. */ |
| /*****************************************************************/ |
| |
| #if DISABLE_INTRA_IN_BPICS |
| if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) && |
| (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))) |
| #endif |
| { |
| if(!(DISABLE_INTRA_WHEN_NOISY && s_common_frm_prms.u1_is_cu_noisy)) |
| { |
| hme_insert_intra_nodes_post_bipred( |
| ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep); |
| } |
| } |
| |
| #if DISABLE_INTRA_IN_BPICS |
| if((ME_XTREME_SPEED_25 == e_me_quality_presets) && |
| (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)) |
| { |
| intra_8x8_enabled = 0; |
| } |
| else |
| #endif |
| { |
| /*TRAQO intra flag updation*/ |
| if(1 == ps_cu_results->ps_best_results->as_pu_results[0].pu.b1_intra_flag) |
| { |
| best_inter_cost = |
| ps_cu_results->ps_best_results->as_pu_results[1].i4_tot_cost; |
| intra_cost = |
| ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost; |
| /*@16x16 level*/ |
| posx = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_x |
| << 2) >> |
| 4; |
| posy = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_y |
| << 2) >> |
| 4; |
| } |
| else |
| { |
| best_inter_cost = |
| ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost; |
| posx = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_x |
| << 2) >> |
| 3; |
| posy = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_y |
| << 2) >> |
| 3; |
| } |
| |
| /* Disable intra16/32/64 flags based on split flags recommended by IPE */ |
| if(ps_cur_ipe_ctb->u1_split_flag) |
| { |
| /* Id of the 32x32 block, 16x16 block in a CTB */ |
| WORD32 i4_32x32_id = |
| (ps_cu_results->u1_y_off >> 5) * 2 + (ps_cu_results->u1_x_off >> 5); |
| WORD32 i4_16x16_id = ((ps_cu_results->u1_y_off >> 4) & 0x1) * 2 + |
| ((ps_cu_results->u1_x_off >> 4) & 0x1); |
| |
| if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag) |
| { |
| if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id] |
| .as_intra16_analyse[i4_16x16_id] |
| .b1_split_flag) |
| { |
| intra_8x8_enabled = |
| ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id] |
| .as_intra16_analyse[i4_16x16_id] |
| .as_intra8_analyse[0] |
| .b1_valid_cu; |
| intra_8x8_enabled &= |
| ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id] |
| .as_intra16_analyse[i4_16x16_id] |
| .as_intra8_analyse[1] |
| .b1_valid_cu; |
| intra_8x8_enabled &= |
| ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id] |
| .as_intra16_analyse[i4_16x16_id] |
| .as_intra8_analyse[2] |
| .b1_valid_cu; |
| intra_8x8_enabled &= |
| ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id] |
| .as_intra16_analyse[i4_16x16_id] |
| .as_intra8_analyse[3] |
| .b1_valid_cu; |
| } |
| } |
| } |
| } |
| |
| if(blk_8x8_mask == 0xf) |
| { |
| parent_cost = |
| ps_search_results->ps_cu_results->ps_best_results[0].i4_tot_cost; |
| ps_search_results->u1_split_flag = 0; |
| } |
| else |
| { |
| ps_search_results->u1_split_flag = 1; |
| } |
| |
| ps_cu_results = &ps_ctxt->as_cu8x8_results[blk_id_in_full_ctb << 2]; |
| |
| if(s_common_frm_prms.u1_is_cu_noisy) |
| { |
| intra_8x8_enabled = 0; |
| } |
| |
| /* Evalaute 8x8 if NxN part id is enabled */ |
| if((ps_search_results->i4_part_mask & ENABLE_NxN) || intra_8x8_enabled) |
| { |
| /* Populates the PU's for the 4 8x8's in one call */ |
| hme_populate_pus_8x8_cu( |
| ps_thrd_ctxt, |
| ps_ctxt, |
| &s_subpel_prms, |
| ps_search_results, |
| ps_cu_results, |
| ps_pu_results, |
| &(as_pu_results[0][0][0]), |
| &s_common_frm_prms, |
| au1_pred_dir_searched, |
| i4_num_pred_dir, |
| blk_8x8_mask); |
| |
| /* Re-initialize the pu_results pointers to the first struct in the stack array */ |
| ps_pu_results = as_inter_pu_results; |
| |
| for(i = 0; i < 4; i++) |
| { |
| if((blk_8x8_mask & (1 << i))) |
| { |
| if(ps_cu_results->i4_part_mask) |
| { |
| hme_decide_part_types( |
| ps_cu_results, |
| ps_pu_results, |
| &s_common_frm_prms, |
| ps_ctxt, |
| ps_cmn_utils_optimised_function_list, |
| ps_me_optimised_function_list |
| |
| ); |
| } |
| /*****************************************************************/ |
| /* INSERT INTRA RESULTS AT 8x8 LEVEL. */ |
| /*****************************************************************/ |
| #if DISABLE_INTRA_IN_BPICS |
| if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) && |
| (ps_ctxt->s_frm_prms.i4_temporal_layer_id > |
| TEMPORAL_LAYER_DISABLE))) |
| #endif |
| { |
| if(!(DISABLE_INTRA_WHEN_NOISY && |
| s_common_frm_prms.u1_is_cu_noisy)) |
| { |
| hme_insert_intra_nodes_post_bipred( |
| ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep); |
| } |
| } |
| |
| child_cost += ps_cu_results->ps_best_results[0].i4_tot_cost; |
| } |
| |
| ps_cu_results++; |
| ps_pu_results++; |
| } |
| |
| /* Compare 16x16 vs 8x8 cost */ |
| if(child_cost < parent_cost) |
| { |
| ps_search_results->best_cu_cost = child_cost; |
| ps_search_results->u1_split_flag = 1; |
| } |
| } |
| } |
| |
| hme_update_mv_bank_encode( |
| ps_search_results, |
| ps_curr_layer->ps_layer_mvbank, |
| blk_x, |
| blk_y, |
| &s_mv_update_prms, |
| au1_pred_dir_searched, |
| i4_num_act_ref_l0); |
| |
| /*********************************************************************/ |
| /* Map the best results to an MV Grid. This is a 18x18 grid that is */ |
| /* useful for doing things like predictor for cost calculation or */ |
| /* also for merge calculations if need be. */ |
| /*********************************************************************/ |
| hme_map_mvs_to_grid( |
| &aps_mv_grid[0], ps_search_results, au1_pred_dir_searched, i4_num_pred_dir); |
| } |
| |
| /* Set the CU tree nodes appropriately */ |
| if(e_me_quality_presets != ME_PRISTINE_QUALITY) |
| { |
| WORD32 i, j; |
| |
| for(i = 0; i < 16; i++) |
| { |
| cur_ctb_cu_tree_t *ps_tree_node = |
| ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE); |
| search_results_t *ps_results = &ps_ctxt->as_search_results_16x16[i]; |
| |
| switch(i >> 2) |
| { |
| case 0: |
| { |
| ps_tree_node = ps_tree_node->ps_child_node_tl; |
| |
| break; |
| } |
| case 1: |
| { |
| ps_tree_node = ps_tree_node->ps_child_node_tr; |
| |
| break; |
| } |
| case 2: |
| { |
| ps_tree_node = ps_tree_node->ps_child_node_bl; |
| |
| break; |
| } |
| case 3: |
| { |
| ps_tree_node = ps_tree_node->ps_child_node_br; |
| |
| break; |
| } |
| } |
| |
| switch(i % 4) |
| { |
| case 0: |
| { |
| ps_tree_node = ps_tree_node->ps_child_node_tl; |
| |
| break; |
| } |
| case 1: |
| { |
| ps_tree_node = ps_tree_node->ps_child_node_tr; |
| |
| break; |
| } |
| case 2: |
| { |
| ps_tree_node = ps_tree_node->ps_child_node_bl; |
| |
| break; |
| } |
| case 3: |
| { |
| ps_tree_node = ps_tree_node->ps_child_node_br; |
| |
| break; |
| } |
| } |
| |
| if(ai4_blk_8x8_mask[i] == 15) |
| { |
| if(!ps_results->u1_split_flag) |
| { |
| ps_tree_node->is_node_valid = 1; |
| NULLIFY_THE_CHILDREN_NODES(ps_tree_node); |
| } |
| else |
| { |
| ps_tree_node->is_node_valid = 0; |
| ENABLE_THE_CHILDREN_NODES(ps_tree_node); |
| } |
| } |
| else |
| { |
| cur_ctb_cu_tree_t *ps_tree_child; |
| |
| ps_tree_node->is_node_valid = 0; |
| |
| for(j = 0; j < 4; j++) |
| { |
| switch(j) |
| { |
| case 0: |
| { |
| ps_tree_child = ps_tree_node->ps_child_node_tl; |
| |
| break; |
| } |
| case 1: |
| { |
| ps_tree_child = ps_tree_node->ps_child_node_tr; |
| |
| break; |
| } |
| case 2: |
| { |
| ps_tree_child = ps_tree_node->ps_child_node_bl; |
| |
| break; |
| } |
| case 3: |
| { |
| ps_tree_child = ps_tree_node->ps_child_node_br; |
| |
| break; |
| } |
| } |
| |
| ps_tree_child->is_node_valid = !!(ai4_blk_8x8_mask[i] & (1 << j)); |
| } |
| } |
| } |
| } |
| |
| if(ME_PRISTINE_QUALITY == e_me_quality_presets) |
| { |
| cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root; |
| |
| hme_analyse_mv_clustering( |
| ps_ctxt->as_search_results_16x16, |
| ps_ctxt->as_cu16x16_results, |
| ps_ctxt->as_cu8x8_results, |
| ps_ctxt->ps_ctb_cluster_info, |
| ps_ctxt->ai1_future_list, |
| ps_ctxt->ai1_past_list, |
| ps_ctxt->s_frm_prms.bidir_enabled, |
| e_me_quality_presets); |
| |
| #if DISABLE_BLK_MERGE_WHEN_NOISY |
| ps_tree->ps_child_node_tl->is_node_valid = !au1_is_32x32Blk_noisy[0]; |
| ps_tree->ps_child_node_tr->is_node_valid = !au1_is_32x32Blk_noisy[1]; |
| ps_tree->ps_child_node_bl->is_node_valid = !au1_is_32x32Blk_noisy[2]; |
| ps_tree->ps_child_node_br->is_node_valid = !au1_is_32x32Blk_noisy[3]; |
| ps_tree->ps_child_node_tl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[0]; |
| ps_tree->ps_child_node_tr->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[1]; |
| ps_tree->ps_child_node_bl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[2]; |
| ps_tree->ps_child_node_br->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[3]; |
| ps_tree->is_node_valid = !au1_is_64x64Blk_noisy[0]; |
| ps_tree->u1_inter_eval_enable = !au1_is_64x64Blk_noisy[0]; |
| #endif |
| |
| en_merge_32x32 = (ps_tree->ps_child_node_tl->is_node_valid << 0) | |
| (ps_tree->ps_child_node_tr->is_node_valid << 1) | |
| (ps_tree->ps_child_node_bl->is_node_valid << 2) | |
| (ps_tree->ps_child_node_br->is_node_valid << 3); |
| |
| en_merge_execution = (ps_tree->ps_child_node_tl->u1_inter_eval_enable << 0) | |
| (ps_tree->ps_child_node_tr->u1_inter_eval_enable << 1) | |
| (ps_tree->ps_child_node_bl->u1_inter_eval_enable << 2) | |
| (ps_tree->ps_child_node_br->u1_inter_eval_enable << 3) | |
| (ps_tree->u1_inter_eval_enable << 4); |
| } |
| else |
| { |
| en_merge_execution = 0x1f; |
| |
| #if DISABLE_BLK_MERGE_WHEN_NOISY |
| en_merge_32x32 = ((!au1_is_32x32Blk_noisy[0] << 0) & (en_merge_32x32 & 1)) | |
| ((!au1_is_32x32Blk_noisy[1] << 1) & (en_merge_32x32 & 2)) | |
| ((!au1_is_32x32Blk_noisy[2] << 2) & (en_merge_32x32 & 4)) | |
| ((!au1_is_32x32Blk_noisy[3] << 3) & (en_merge_32x32 & 8)); |
| #endif |
| } |
| |
| /* Re-initialize the pu_results pointers to the first struct in the stack array */ |
| ps_pu_results = as_inter_pu_results; |
| |
| { |
| WORD32 ref_ctr; |
| |
| s_ctb_prms.i4_ctb_x = i4_ctb_x << 6; |
| s_ctb_prms.i4_ctb_y = i4_ctb_y << 6; |
| |
| /* MV limit is different based on ref. PIC */ |
| for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) |
| { |
| SCALE_RANGE_PRMS(as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1); |
| SCALE_RANGE_PRMS(as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2); |
| } |
| |
| e_merge_result = CU_SPLIT; |
| merge_count_32x32 = 0; |
| |
| if((en_merge_32x32 & 1) && (en_merge_execution & 1)) |
| { |
| range_prms_t *ps_pic_limit; |
| if(s_merge_prms_32x32_tl.i4_use_rec == 1) |
| { |
| ps_pic_limit = &s_pic_limit_rec; |
| } |
| else |
| { |
| ps_pic_limit = &s_pic_limit_inp; |
| } |
| /* MV limit is different based on ref. PIC */ |
| for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) |
| { |
| hme_derive_search_range( |
| s_merge_prms_32x32_tl.aps_mv_range[ref_ctr], |
| ps_pic_limit, |
| &as_mv_limit[ref_ctr], |
| i4_ctb_x << 6, |
| i4_ctb_y << 6, |
| 32, |
| 32); |
| |
| SCALE_RANGE_PRMS_POINTERS( |
| s_merge_prms_32x32_tl.aps_mv_range[ref_ctr], |
| s_merge_prms_32x32_tl.aps_mv_range[ref_ctr], |
| 2); |
| } |
| s_merge_prms_32x32_tl.i4_ctb_x_off = i4_ctb_x << 6; |
| s_merge_prms_32x32_tl.i4_ctb_y_off = i4_ctb_y << 6; |
| s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[0]; |
| |
| e_merge_result = hme_try_merge_high_speed( |
| ps_thrd_ctxt, |
| ps_ctxt, |
| ps_cur_ipe_ctb, |
| &s_subpel_prms, |
| &s_merge_prms_32x32_tl, |
| ps_pu_results, |
| &as_pu_results[0][0][0]); |
| |
| if(e_merge_result == CU_MERGED) |
| { |
| inter_cu_results_t *ps_cu_results = |
| s_merge_prms_32x32_tl.ps_results_merge->ps_cu_results; |
| |
| if(!((ps_cu_results->u1_num_best_results == 1) && |
| (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag))) |
| { |
| hme_map_mvs_to_grid( |
| &aps_mv_grid[0], |
| s_merge_prms_32x32_tl.ps_results_merge, |
| s_merge_prms_32x32_tl.au1_pred_dir_searched, |
| s_merge_prms_32x32_tl.i4_num_pred_dir_actual); |
| } |
| |
| if(ME_PRISTINE_QUALITY != e_me_quality_presets) |
| { |
| ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] |
| .ps_child_node_tl->is_node_valid = 1; |
| NULLIFY_THE_CHILDREN_NODES( |
| ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] |
| .ps_child_node_tl); |
| } |
| |
| merge_count_32x32++; |
| e_merge_result = CU_SPLIT; |
| } |
| else if(ME_PRISTINE_QUALITY == e_me_quality_presets) |
| { |
| #if ENABLE_CU_TREE_CULLING |
| cur_ctb_cu_tree_t *ps_tree = |
| ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl; |
| |
| ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; |
| en_merge_execution = (en_merge_execution & (~(1 << 4))); |
| ENABLE_THE_CHILDREN_NODES(ps_tree); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); |
| #endif |
| } |
| } |
| else if((en_merge_32x32 & 1) && (!(en_merge_execution & 1))) |
| { |
| #if ENABLE_CU_TREE_CULLING |
| cur_ctb_cu_tree_t *ps_tree = |
| ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl; |
| |
| ENABLE_THE_CHILDREN_NODES(ps_tree); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); |
| #endif |
| |
| if(au1_is_32x32Blk_noisy[0] && DISABLE_INTRA_WHEN_NOISY) |
| { |
| ps_tree->is_node_valid = 0; |
| ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; |
| en_merge_execution = (en_merge_execution & (~(1 << 4))); |
| } |
| } |
| |
| if((en_merge_32x32 & 2) && (en_merge_execution & 2)) |
| { |
| range_prms_t *ps_pic_limit; |
| if(s_merge_prms_32x32_tr.i4_use_rec == 1) |
| { |
| ps_pic_limit = &s_pic_limit_rec; |
| } |
| else |
| { |
| ps_pic_limit = &s_pic_limit_inp; |
| } |
| /* MV limit is different based on ref. PIC */ |
| for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) |
| { |
| hme_derive_search_range( |
| s_merge_prms_32x32_tr.aps_mv_range[ref_ctr], |
| ps_pic_limit, |
| &as_mv_limit[ref_ctr], |
| (i4_ctb_x << 6) + 32, |
| i4_ctb_y << 6, |
| 32, |
| 32); |
| SCALE_RANGE_PRMS_POINTERS( |
| s_merge_prms_32x32_tr.aps_mv_range[ref_ctr], |
| s_merge_prms_32x32_tr.aps_mv_range[ref_ctr], |
| 2); |
| } |
| s_merge_prms_32x32_tr.i4_ctb_x_off = i4_ctb_x << 6; |
| s_merge_prms_32x32_tr.i4_ctb_y_off = i4_ctb_y << 6; |
| s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[1]; |
| |
| e_merge_result = hme_try_merge_high_speed( |
| ps_thrd_ctxt, |
| ps_ctxt, |
| ps_cur_ipe_ctb, |
| &s_subpel_prms, |
| &s_merge_prms_32x32_tr, |
| ps_pu_results, |
| &as_pu_results[0][0][0]); |
| |
| if(e_merge_result == CU_MERGED) |
| { |
| inter_cu_results_t *ps_cu_results = |
| s_merge_prms_32x32_tr.ps_results_merge->ps_cu_results; |
| |
| if(!((ps_cu_results->u1_num_best_results == 1) && |
| (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag))) |
| { |
| hme_map_mvs_to_grid( |
| &aps_mv_grid[0], |
| s_merge_prms_32x32_tr.ps_results_merge, |
| s_merge_prms_32x32_tr.au1_pred_dir_searched, |
| s_merge_prms_32x32_tr.i4_num_pred_dir_actual); |
| } |
| |
| if(ME_PRISTINE_QUALITY != e_me_quality_presets) |
| { |
| ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] |
| .ps_child_node_tr->is_node_valid = 1; |
| NULLIFY_THE_CHILDREN_NODES( |
| ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] |
| .ps_child_node_tr); |
| } |
| |
| merge_count_32x32++; |
| e_merge_result = CU_SPLIT; |
| } |
| else if(ME_PRISTINE_QUALITY == e_me_quality_presets) |
| { |
| #if ENABLE_CU_TREE_CULLING |
| cur_ctb_cu_tree_t *ps_tree = |
| ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr; |
| |
| ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; |
| en_merge_execution = (en_merge_execution & (~(1 << 4))); |
| ENABLE_THE_CHILDREN_NODES(ps_tree); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); |
| #endif |
| } |
| } |
| else if((en_merge_32x32 & 2) && (!(en_merge_execution & 2))) |
| { |
| #if ENABLE_CU_TREE_CULLING |
| cur_ctb_cu_tree_t *ps_tree = |
| ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr; |
| |
| ENABLE_THE_CHILDREN_NODES(ps_tree); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); |
| #endif |
| |
| if(au1_is_32x32Blk_noisy[1] && DISABLE_INTRA_WHEN_NOISY) |
| { |
| ps_tree->is_node_valid = 0; |
| ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; |
| en_merge_execution = (en_merge_execution & (~(1 << 4))); |
| } |
| } |
| |
| if((en_merge_32x32 & 4) && (en_merge_execution & 4)) |
| { |
| range_prms_t *ps_pic_limit; |
| if(s_merge_prms_32x32_bl.i4_use_rec == 1) |
| { |
| ps_pic_limit = &s_pic_limit_rec; |
| } |
| else |
| { |
| ps_pic_limit = &s_pic_limit_inp; |
| } |
| /* MV limit is different based on ref. PIC */ |
| for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) |
| { |
| hme_derive_search_range( |
| s_merge_prms_32x32_bl.aps_mv_range[ref_ctr], |
| ps_pic_limit, |
| &as_mv_limit[ref_ctr], |
| i4_ctb_x << 6, |
| (i4_ctb_y << 6) + 32, |
| 32, |
| 32); |
| SCALE_RANGE_PRMS_POINTERS( |
| s_merge_prms_32x32_bl.aps_mv_range[ref_ctr], |
| s_merge_prms_32x32_bl.aps_mv_range[ref_ctr], |
| 2); |
| } |
| s_merge_prms_32x32_bl.i4_ctb_x_off = i4_ctb_x << 6; |
| s_merge_prms_32x32_bl.i4_ctb_y_off = i4_ctb_y << 6; |
| s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[2]; |
| |
| e_merge_result = hme_try_merge_high_speed( |
| ps_thrd_ctxt, |
| ps_ctxt, |
| ps_cur_ipe_ctb, |
| &s_subpel_prms, |
| &s_merge_prms_32x32_bl, |
| ps_pu_results, |
| &as_pu_results[0][0][0]); |
| |
| if(e_merge_result == CU_MERGED) |
| { |
| inter_cu_results_t *ps_cu_results = |
| s_merge_prms_32x32_bl.ps_results_merge->ps_cu_results; |
| |
| if(!((ps_cu_results->u1_num_best_results == 1) && |
| (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag))) |
| { |
| hme_map_mvs_to_grid( |
| &aps_mv_grid[0], |
| s_merge_prms_32x32_bl.ps_results_merge, |
| s_merge_prms_32x32_bl.au1_pred_dir_searched, |
| s_merge_prms_32x32_bl.i4_num_pred_dir_actual); |
| } |
| |
| if(ME_PRISTINE_QUALITY != e_me_quality_presets) |
| { |
| ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] |
| .ps_child_node_bl->is_node_valid = 1; |
| NULLIFY_THE_CHILDREN_NODES( |
| ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] |
| .ps_child_node_bl); |
| } |
| |
| merge_count_32x32++; |
| e_merge_result = CU_SPLIT; |
| } |
| else if(ME_PRISTINE_QUALITY == e_me_quality_presets) |
| { |
| #if ENABLE_CU_TREE_CULLING |
| cur_ctb_cu_tree_t *ps_tree = |
| ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl; |
| |
| ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; |
| en_merge_execution = (en_merge_execution & (~(1 << 4))); |
| ENABLE_THE_CHILDREN_NODES(ps_tree); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); |
| #endif |
| } |
| } |
| else if((en_merge_32x32 & 4) && (!(en_merge_execution & 4))) |
| { |
| #if ENABLE_CU_TREE_CULLING |
| cur_ctb_cu_tree_t *ps_tree = |
| ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl; |
| |
| ENABLE_THE_CHILDREN_NODES(ps_tree); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); |
| #endif |
| |
| if(au1_is_32x32Blk_noisy[2] && DISABLE_INTRA_WHEN_NOISY) |
| { |
| ps_tree->is_node_valid = 0; |
| ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; |
| en_merge_execution = (en_merge_execution & (~(1 << 4))); |
| } |
| } |
| |
| if((en_merge_32x32 & 8) && (en_merge_execution & 8)) |
| { |
| range_prms_t *ps_pic_limit; |
| if(s_merge_prms_32x32_br.i4_use_rec == 1) |
| { |
| ps_pic_limit = &s_pic_limit_rec; |
| } |
| else |
| { |
| ps_pic_limit = &s_pic_limit_inp; |
| } |
| /* MV limit is different based on ref. PIC */ |
| for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) |
| { |
| hme_derive_search_range( |
| s_merge_prms_32x32_br.aps_mv_range[ref_ctr], |
| ps_pic_limit, |
| &as_mv_limit[ref_ctr], |
| (i4_ctb_x << 6) + 32, |
| (i4_ctb_y << 6) + 32, |
| 32, |
| 32); |
| |
| SCALE_RANGE_PRMS_POINTERS( |
| s_merge_prms_32x32_br.aps_mv_range[ref_ctr], |
| s_merge_prms_32x32_br.aps_mv_range[ref_ctr], |
| 2); |
| } |
| s_merge_prms_32x32_br.i4_ctb_x_off = i4_ctb_x << 6; |
| s_merge_prms_32x32_br.i4_ctb_y_off = i4_ctb_y << 6; |
| s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[3]; |
| |
| e_merge_result = hme_try_merge_high_speed( |
| ps_thrd_ctxt, |
| ps_ctxt, |
| ps_cur_ipe_ctb, |
| &s_subpel_prms, |
| &s_merge_prms_32x32_br, |
| ps_pu_results, |
| &as_pu_results[0][0][0]); |
| |
| if(e_merge_result == CU_MERGED) |
| { |
| /*inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_br.ps_results_merge->ps_cu_results; |
| |
| if(!((ps_cu_results->u1_num_best_results == 1) && |
| (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag))) |
| { |
| hme_map_mvs_to_grid |
| ( |
| &aps_mv_grid[0], |
| s_merge_prms_32x32_br.ps_results_merge, |
| s_merge_prms_32x32_br.au1_pred_dir_searched, |
| s_merge_prms_32x32_br.i4_num_pred_dir_actual |
| ); |
| }*/ |
| |
| if(ME_PRISTINE_QUALITY != e_me_quality_presets) |
| { |
| ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] |
| .ps_child_node_br->is_node_valid = 1; |
| NULLIFY_THE_CHILDREN_NODES( |
| ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] |
| .ps_child_node_br); |
| } |
| |
| merge_count_32x32++; |
| e_merge_result = CU_SPLIT; |
| } |
| else if(ME_PRISTINE_QUALITY == e_me_quality_presets) |
| { |
| #if ENABLE_CU_TREE_CULLING |
| cur_ctb_cu_tree_t *ps_tree = |
| ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br; |
| |
| ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; |
| en_merge_execution = (en_merge_execution & (~(1 << 4))); |
| ENABLE_THE_CHILDREN_NODES(ps_tree); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); |
| #endif |
| } |
| } |
| else if((en_merge_32x32 & 8) && (!(en_merge_execution & 8))) |
| { |
| #if ENABLE_CU_TREE_CULLING |
| cur_ctb_cu_tree_t *ps_tree = |
| ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br; |
| |
| ENABLE_THE_CHILDREN_NODES(ps_tree); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl); |
| ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br); |
| #endif |
| |
| if(au1_is_32x32Blk_noisy[3] && DISABLE_INTRA_WHEN_NOISY) |
| { |
| ps_tree->is_node_valid = 0; |
| ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0; |
| en_merge_execution = (en_merge_execution & (~(1 << 4))); |
| } |
| } |
| |
| /* Try merging all 32x32 to 64x64 candts */ |
| if(((en_merge_32x32 & 0xf) == 0xf) && |
| (((merge_count_32x32 == 4) && (e_me_quality_presets != ME_PRISTINE_QUALITY)) || |
| ((en_merge_execution & 16) && (e_me_quality_presets == ME_PRISTINE_QUALITY)))) |
| if((((e_me_quality_presets == ME_XTREME_SPEED_25) && |
| !DISABLE_64X64_BLOCK_MERGE_IN_ME_IN_XS25) || |
| (e_me_quality_presets != ME_XTREME_SPEED_25))) |
| { |
| range_prms_t *ps_pic_limit; |
| if(s_merge_prms_64x64.i4_use_rec == 1) |
| { |
| ps_pic_limit = &s_pic_limit_rec; |
| } |
| else |
| { |
| ps_pic_limit = &s_pic_limit_inp; |
| } |
| /* MV limit is different based on ref. PIC */ |
| for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++) |
| { |
| hme_derive_search_range( |
| s_merge_prms_64x64.aps_mv_range[ref_ctr], |
| ps_pic_limit, |
| &as_mv_limit[ref_ctr], |
| i4_ctb_x << 6, |
| i4_ctb_y << 6, |
| 64, |
| 64); |
| |
| SCALE_RANGE_PRMS_POINTERS( |
| s_merge_prms_64x64.aps_mv_range[ref_ctr], |
| s_merge_prms_64x64.aps_mv_range[ref_ctr], |
| 2); |
| } |
| s_merge_prms_64x64.i4_ctb_x_off = i4_ctb_x << 6; |
| s_merge_prms_64x64.i4_ctb_y_off = i4_ctb_y << 6; |
| s_subpel_prms.u1_is_cu_noisy = au1_is_64x64Blk_noisy[0]; |
| |
| e_merge_result = hme_try_merge_high_speed( |
| ps_thrd_ctxt, |
| ps_ctxt, |
| ps_cur_ipe_ctb, |
| &s_subpel_prms, |
| &s_merge_prms_64x64, |
| ps_pu_results, |
| &as_pu_results[0][0][0]); |
| |
| if((e_merge_result == CU_MERGED) && |
| (ME_PRISTINE_QUALITY != e_me_quality_presets)) |
| { |
| ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] |
| .is_node_valid = 1; |
| NULLIFY_THE_CHILDREN_NODES( |
| ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)); |
| } |
| else if( |
| (e_merge_result == CU_SPLIT) && |
| (ME_PRISTINE_QUALITY == e_me_quality_presets)) |
| { |
| ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)] |
| .is_node_valid = 0; |
| } |
| } |
| |
| /*****************************************************************/ |
| /* UPDATION OF RESULT TO EXTERNAL STRUCTURES */ |
| /*****************************************************************/ |
| pf_ext_update_fxn((void *)ps_thrd_ctxt, (void *)ps_ctxt, i4_ctb_x, i4_ctb_y); |
| |
| { |
| #ifdef _DEBUG |
| S32 wd = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64) |
| ? 64 |
| : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off; |
| S32 ht = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64) |
| ? 64 |
| : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off; |
| ASSERT( |
| (wd * ht) == |
| ihevce_compute_area_of_valid_cus_in_ctb( |
| &ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)])); |
| #endif |
| } |
| } |
| |
| /* set the dependency for the corresponding row in enc loop */ |
| ihevce_dmgr_set_row_row_sync( |
| pv_dep_mngr_encloop_dep_me, |
| (i4_ctb_x + 1), |
| i4_ctb_y, |
| tile_col_idx /* Col Tile No. */); |
| |
| left_ctb_in_diff_tile = 0; |
| } |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * @fn void hme_refine_no_encode(coarse_me_ctxt_t *ps_ctxt, |
| * refine_layer_prms_t *ps_refine_prms) |
| * |
| * @brief Top level entry point for refinement ME |
| * |
| * @param[in,out] ps_ctxt: ME Handle |
| * |
| * @param[in] ps_refine_prms : refinement layer prms |
| * |
| * @return None |
| ******************************************************************************** |
| */ |
| void hme_refine_no_encode( |
| coarse_me_ctxt_t *ps_ctxt, |
| refine_prms_t *ps_refine_prms, |
| multi_thrd_ctxt_t *ps_multi_thrd_ctxt, |
| S32 lyr_job_type, |
| WORD32 i4_ping_pong, |
| void **ppv_dep_mngr_hme_sync) |
| { |
| BLK_SIZE_T e_search_blk_size, e_result_blk_size; |
| ME_QUALITY_PRESETS_T e_me_quality_presets = |
| ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets; |
| |
| /*************************************************************************/ |
| /* Complexity of search: Low to High */ |
| /*************************************************************************/ |
| SEARCH_COMPLEXITY_T e_search_complexity; |
| |
| /*************************************************************************/ |
| /* Config parameter structures for varius ME submodules */ |
| /*************************************************************************/ |
| hme_search_prms_t s_search_prms_blk; |
| mvbank_update_prms_t s_mv_update_prms; |
| |
| /*************************************************************************/ |
| /* All types of search candidates for predictor based search. */ |
| /*************************************************************************/ |
| S32 num_init_candts = 0; |
| search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS]; |
| search_node_t as_top_neighbours[4], as_left_neighbours[3]; |
| search_node_t *ps_candt_zeromv, *ps_candt_tl, *ps_candt_tr; |
| search_node_t *ps_candt_l, *ps_candt_t; |
| search_node_t *ps_candt_prj_br[2], *ps_candt_prj_b[2], *ps_candt_prj_r[2]; |
| search_node_t *ps_candt_prj_bl[2]; |
| search_node_t *ps_candt_prj_tr[2], *ps_candt_prj_t[2], *ps_candt_prj_tl[2]; |
| search_node_t *ps_candt_prj_coloc[2]; |
| |
| pf_get_wt_inp fp_get_wt_inp; |
| |
| search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9]; |
| U32 au4_unique_node_map[MAP_X_MAX * 2]; |
| |
| /*EIID */ |
| WORD32 i4_num_inter_wins = 0; //debug code to find stat of |
| WORD32 i4_num_comparisions = 0; //debug code |
| WORD32 i4_threshold_multiplier; |
| WORD32 i4_threshold_divider; |
| WORD32 i4_temporal_layer = |
| ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_temporal_lyr_id; |
| |
| /*************************************************************************/ |
| /* points ot the search results for the blk level search (8x8/16x16) */ |
| /*************************************************************************/ |
| search_results_t *ps_search_results; |
| |
| /*************************************************************************/ |
| /* Coordinates */ |
| /*************************************************************************/ |
| S32 blk_x, i4_ctb_x, blk_id_in_ctb; |
| //S32 i4_ctb_y; |
| S32 pos_x, pos_y; |
| S32 blk_id_in_full_ctb; |
| S32 i4_num_srch_cands; |
| |
| S32 blk_y; |
| |
| /*************************************************************************/ |
| /* Related to dimensions of block being searched and pic dimensions */ |
| /*************************************************************************/ |
| S32 blk_wd, blk_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic; |
| S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb; |
| S32 num_results_prev_layer; |
| |
| /*************************************************************************/ |
| /* Size of a basic unit for this layer. For non encode layers, we search */ |
| /* in block sizes of 8x8. For encode layers, though we search 16x16s the */ |
| /* basic unit size is the ctb size. */ |
| /*************************************************************************/ |
| S32 unit_size; |
| |
| /*************************************************************************/ |
| /* Pointers to context in current and coarser layers */ |
| /*************************************************************************/ |
| layer_ctxt_t *ps_curr_layer, *ps_coarse_layer; |
| |
| /*************************************************************************/ |
| /* to store mv range per blk, and picture limit, allowed search range */ |
| /* range prms in hpel and qpel units as well */ |
| /*************************************************************************/ |
| range_prms_t s_range_prms_inp, s_range_prms_rec; |
| range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF]; |
| /*************************************************************************/ |
| /* These variables are used to track number of references at different */ |
| /* stages of ME. */ |
| /*************************************************************************/ |
| S32 i4_num_ref_fpel, i4_num_ref_before_merge; |
| S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer; |
| S32 lambda_inp = ps_refine_prms->lambda_inp; |
| |
| /*************************************************************************/ |
| /* When a layer is implicit, it means that it searches on 1 or 2 ref idx */ |
| /* Explicit means it searches on all active ref idx. */ |
| /*************************************************************************/ |
| S32 curr_layer_implicit, prev_layer_implicit; |
| |
| /*************************************************************************/ |
| /* Variables for loop counts */ |
| /*************************************************************************/ |
| S32 id; |
| S08 i1_ref_idx; |
| |
| /*************************************************************************/ |
| /* Input pointer and stride */ |
| /*************************************************************************/ |
| U08 *pu1_inp; |
| S32 i4_inp_stride; |
| |
| S32 end_of_frame; |
| |
| S32 num_sync_units_in_row; |
| |
| PF_HME_PROJECT_COLOC_CANDT_FXN pf_hme_project_coloc_candt; |
| ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1); |
| |
| /*************************************************************************/ |
| /* Pointers to current and coarse layer are needed for projection */ |
| /* Pointer to prev layer are needed for other candts like coloc */ |
| /*************************************************************************/ |
| ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id]; |
| |
| ps_coarse_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id + 1]; |
| |
| num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref; |
| |
| /* Function pointer is selected based on the C vc X86 macro */ |
| |
| fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list) |
| ->pf_get_wt_inp_8x8; |
| |
| i4_inp_stride = ps_curr_layer->i4_inp_stride; |
| i4_pic_wd = ps_curr_layer->i4_wd; |
| i4_pic_ht = ps_curr_layer->i4_ht; |
| e_search_complexity = ps_refine_prms->e_search_complexity; |
| |
| end_of_frame = 0; |
| |
| /* If the previous layer is non-encode layer, then use dyadic projection */ |
| if(0 == ps_ctxt->u1_encode[ps_refine_prms->i4_layer_id + 1]) |
| pf_hme_project_coloc_candt = hme_project_coloc_candt_dyadic; |
| else |
| pf_hme_project_coloc_candt = hme_project_coloc_candt; |
| |
| /* This points to all the initial candts */ |
| ps_search_candts = &as_search_candts[0]; |
| |
| { |
| e_search_blk_size = BLK_8x8; |
| blk_wd = blk_ht = 8; |
| blk_size_shift = 3; |
| s_mv_update_prms.i4_shift = 0; |
| /*********************************************************************/ |
| /* In case we do not encode this layer, we search 8x8 with or without*/ |
| /* enable 4x4 SAD. */ |
| /*********************************************************************/ |
| { |
| S32 i4_mask = (ENABLE_2Nx2N); |
| |
| e_result_blk_size = BLK_8x8; |
| if(ps_refine_prms->i4_enable_4x4_part) |
| { |
| i4_mask |= (ENABLE_NxN); |
| e_result_blk_size = BLK_4x4; |
| s_mv_update_prms.i4_shift = 1; |
| } |
| |
| s_search_prms_blk.i4_part_mask = i4_mask; |
| } |
| |
| unit_size = blk_wd; |
| s_search_prms_blk.i4_inp_stride = unit_size; |
| } |
| |
| /* This is required to properly update the layer mv bank */ |
| s_mv_update_prms.e_search_blk_size = e_search_blk_size; |
| s_search_prms_blk.e_blk_size = e_search_blk_size; |
| |
| /*************************************************************************/ |
| /* If current layer is explicit, then the number of ref frames are to */ |
| /* be same as previous layer. Else it will be 2 */ |
| /*************************************************************************/ |
| i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref; |
| if(ps_refine_prms->explicit_ref) |
| { |
| curr_layer_implicit = 0; |
| i4_num_ref_fpel = i4_num_ref_prev_layer; |
| /* 100578 : Using same mv cost fun. for all presets. */ |
| s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_refine; |
| } |
| else |
| { |
| i4_num_ref_fpel = 2; |
| curr_layer_implicit = 1; |
| { |
| if(ME_MEDIUM_SPEED > e_me_quality_presets) |
| { |
| s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit; |
| } |
| else |
| { |
| #if USE_MODIFIED == 1 |
| s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified; |
| #else |
| s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed; |
| #endif |
| } |
| } |
| } |
| |
| i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer); |
| if(ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == |
| IV_IDR_FRAME || |
| ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == IV_I_FRAME) |
| { |
| i4_num_ref_fpel = 1; |
| } |
| if(i4_num_ref_prev_layer <= 2) |
| { |
| prev_layer_implicit = 1; |
| curr_layer_implicit = 1; |
| i4_num_ref_each_dir = 1; |
| } |
| else |
| { |
| /* It is assumed that we have equal number of references in each dir */ |
| //ASSERT(!(i4_num_ref_prev_layer & 1)); |
| prev_layer_implicit = 0; |
| i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1; |
| } |
| s_mv_update_prms.i4_num_ref = i4_num_ref_fpel; |
| s_mv_update_prms.i4_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0; |
| s_mv_update_prms.i4_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1; |
| |
| /* this can be kept to 1 or 2 */ |
| i4_num_ref_before_merge = 2; |
| i4_num_ref_before_merge = MIN(i4_num_ref_before_merge, i4_num_ref_fpel); |
| |
| /* Set up place holders to hold the search nodes of each initial candt */ |
| for(i = 0; i < MAX_INIT_CANDTS; i++) |
| { |
| ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i]; |
| INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0); |
| } |
| |
| /* redundant, but doing it here since it is used in pred ctxt init */ |
| ps_candt_zeromv = ps_search_candts[0].ps_search_node; |
| for(i = 0; i < 3; i++) |
| { |
| search_node_t *ps_search_node; |
| ps_search_node = &as_left_neighbours[i]; |
| INIT_SEARCH_NODE(ps_search_node, 0); |
| ps_search_node = &as_top_neighbours[i]; |
| INIT_SEARCH_NODE(ps_search_node, 0); |
| } |
| |
| INIT_SEARCH_NODE(&as_top_neighbours[3], 0); |
| /* bottom left node always not available for the blk being searched */ |
| as_left_neighbours[2].u1_is_avail = 0; |
| /*************************************************************************/ |
| /* Initialize all the search results structure here. We update all the */ |
| /* search results to default values, and configure things like blk sizes */ |
| /*************************************************************************/ |
| if(ps_refine_prms->i4_encode == 0) |
| { |
| S32 pred_lx; |
| search_results_t *ps_search_results; |
| |
| ps_search_results = &ps_ctxt->s_search_results_8x8; |
| hme_init_search_results( |
| ps_search_results, |
| i4_num_ref_fpel, |
| ps_refine_prms->i4_num_fpel_results, |
| ps_refine_prms->i4_num_results_per_part, |
| e_search_blk_size, |
| 0, |
| 0, |
| &ps_ctxt->au1_is_past[0]); |
| for(pred_lx = 0; pred_lx < 2; pred_lx++) |
| { |
| hme_init_pred_ctxt_no_encode( |
| &ps_search_results->as_pred_ctxt[pred_lx], |
| ps_search_results, |
| &as_top_neighbours[0], |
| &as_left_neighbours[0], |
| &ps_candt_prj_coloc[0], |
| ps_candt_zeromv, |
| ps_candt_zeromv, |
| pred_lx, |
| lambda_inp, |
| ps_refine_prms->lambda_q_shift, |
| &ps_ctxt->apu1_ref_bits_tlu_lc[0], |
| &ps_ctxt->ai2_ref_scf[0]); |
| } |
| } |
| |
| /*********************************************************************/ |
| /* Initialize the dyn. search range params. for each reference index */ |
| /* in current layer ctxt */ |
| /*********************************************************************/ |
| /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ |
| if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) |
| { |
| WORD32 ref_ctr; |
| |
| for(ref_ctr = 0; ref_ctr < s_mv_update_prms.i4_num_ref; ref_ctr++) |
| { |
| INIT_DYN_SEARCH_PRMS( |
| &ps_ctxt->s_coarse_dyn_range_prms |
| .as_dyn_range_prms[ps_refine_prms->i4_layer_id][ref_ctr], |
| ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]); |
| } |
| } |
| |
| /* Next set up initial candidates according to a given set of rules. */ |
| /* The number of initial candidates affects the quality of ME in the */ |
| /* case of motion with multiple degrees of freedom. In case of simple */ |
| /* translational motion, a current and a few causal and non causal */ |
| /* candts would suffice. More candidates help to cover more complex */ |
| /* cases like partitions, rotation/zoom, occlusion in/out, fine motion */ |
| /* where multiple ref helps etc. */ |
| /* The candidate choice also depends on the following parameters. */ |
| /* e_search_complexity: SRCH_CX_LOW, SRCH_CX_MED, SRCH_CX_HIGH */ |
| /* Whether we encode or not, and the type of search across reference */ |
| /* i.e. the previous layer may have been explicit/implicit and curr */ |
| /* layer may be explicit/implicit */ |
| |
| /* 0, 0, L, T, projected coloc best always presnt by default */ |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me(ZERO_MV, e_me_quality_presets); |
| ps_candt_zeromv = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 0; |
| ps_candt_zeromv->s_mv.i2_mvx = 0; |
| ps_candt_zeromv->s_mv.i2_mvy = 0; |
| |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me(SPATIAL_LEFT0, e_me_quality_presets); |
| ps_candt_l = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 0; |
| |
| /* Even in ME_HIGH_SPEED mode, in layer 0, blocks */ |
| /* not at the CTB boundary use the causal T and */ |
| /* not the projected T, although the candidate is */ |
| /* still pointed to by ps_candt_prj_t[0] */ |
| if(ME_MEDIUM_SPEED <= e_me_quality_presets) |
| { |
| /* Using Projected top to eliminate sync */ |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me( |
| PROJECTED_TOP0, e_me_quality_presets); |
| ps_candt_prj_t[0] = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 1; |
| } |
| else |
| { |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me( |
| SPATIAL_TOP0, e_me_quality_presets); |
| ps_candt_t = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 0; |
| } |
| |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me( |
| PROJECTED_COLOC0, e_me_quality_presets); |
| ps_candt_prj_coloc[0] = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 1; |
| |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me( |
| PROJECTED_COLOC1, e_me_quality_presets); |
| ps_candt_prj_coloc[1] = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 1; |
| |
| if(ME_MEDIUM_SPEED <= e_me_quality_presets) |
| { |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me( |
| PROJECTED_TOP_RIGHT0, e_me_quality_presets); |
| ps_candt_prj_tr[0] = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 1; |
| |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me( |
| PROJECTED_TOP_LEFT0, e_me_quality_presets); |
| ps_candt_prj_tl[0] = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 1; |
| } |
| else |
| { |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me( |
| SPATIAL_TOP_RIGHT0, e_me_quality_presets); |
| ps_candt_tr = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 0; |
| |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me( |
| SPATIAL_TOP_LEFT0, e_me_quality_presets); |
| ps_candt_tl = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 0; |
| } |
| |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me( |
| PROJECTED_RIGHT0, e_me_quality_presets); |
| ps_candt_prj_r[0] = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 1; |
| |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me( |
| PROJECTED_BOTTOM0, e_me_quality_presets); |
| ps_candt_prj_b[0] = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 1; |
| |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me( |
| PROJECTED_BOTTOM_RIGHT0, e_me_quality_presets); |
| ps_candt_prj_br[0] = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 1; |
| |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me( |
| PROJECTED_BOTTOM_LEFT0, e_me_quality_presets); |
| ps_candt_prj_bl[0] = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 1; |
| |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me( |
| PROJECTED_RIGHT1, e_me_quality_presets); |
| ps_candt_prj_r[1] = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 1; |
| |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me( |
| PROJECTED_BOTTOM1, e_me_quality_presets); |
| ps_candt_prj_b[1] = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 1; |
| |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me( |
| PROJECTED_BOTTOM_RIGHT1, e_me_quality_presets); |
| ps_candt_prj_br[1] = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 1; |
| |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me( |
| PROJECTED_BOTTOM_LEFT1, e_me_quality_presets); |
| ps_candt_prj_bl[1] = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 1; |
| |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me(PROJECTED_TOP1, e_me_quality_presets); |
| ps_candt_prj_t[1] = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 1; |
| |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me( |
| PROJECTED_TOP_RIGHT1, e_me_quality_presets); |
| ps_candt_prj_tr[1] = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 1; |
| |
| id = hme_decide_search_candidate_priority_in_l1_and_l2_me( |
| PROJECTED_TOP_LEFT1, e_me_quality_presets); |
| ps_candt_prj_tl[1] = ps_search_candts[id].ps_search_node; |
| ps_search_candts[id].u1_num_steps_refine = 1; |
| |
| /*************************************************************************/ |
| /* Now that the candidates have been ordered, to choose the right number */ |
| /* of initial candidates. */ |
| /*************************************************************************/ |
| if(curr_layer_implicit && !prev_layer_implicit) |
| { |
| if(e_search_complexity == SEARCH_CX_LOW) |
| num_init_candts = 7; |
| else if(e_search_complexity == SEARCH_CX_MED) |
| num_init_candts = 13; |
| else if(e_search_complexity == SEARCH_CX_HIGH) |
| num_init_candts = 18; |
| else |
| ASSERT(0); |
| } |
| else |
| { |
| if(e_search_complexity == SEARCH_CX_LOW) |
| num_init_candts = 5; |
| else if(e_search_complexity == SEARCH_CX_MED) |
| num_init_candts = 11; |
| else if(e_search_complexity == SEARCH_CX_HIGH) |
| num_init_candts = 16; |
| else |
| ASSERT(0); |
| } |
| |
| if(ME_XTREME_SPEED_25 == e_me_quality_presets) |
| { |
| num_init_candts = NUM_INIT_SEARCH_CANDS_IN_L1_AND_L2_ME_IN_XS25; |
| } |
| |
| /*************************************************************************/ |
| /* The following search parameters are fixed throughout the search across*/ |
| /* all blks. So these are configured outside processing loop */ |
| /*************************************************************************/ |
| s_search_prms_blk.i4_num_init_candts = num_init_candts; |
| s_search_prms_blk.i4_start_step = 1; |
| s_search_prms_blk.i4_use_satd = 0; |
| s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel; |
| /* we use recon only for encoded layers, otherwise it is not available */ |
| s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel; |
| |
| s_search_prms_blk.ps_search_candts = ps_search_candts; |
| /* We use the same mv_range for all ref. pic. So assign to member 0 */ |
| if(s_search_prms_blk.i4_use_rec) |
| s_search_prms_blk.aps_mv_range[0] = &s_range_prms_rec; |
| else |
| s_search_prms_blk.aps_mv_range[0] = &s_range_prms_inp; |
| /*************************************************************************/ |
| /* Initialize coordinates. Meaning as follows */ |
| /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks */ |
| /* blk_y : same as above, y coord. */ |
| /* num_blks_in_this_ctb : number of blks in this given ctb that starts */ |
| /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries. */ |
| /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left */ |
| /* corner of the picture. Always multiple of 64. */ |
| /* blk_id_in_ctb : encode order id of the blk in the ctb. */ |
| /*************************************************************************/ |
| blk_y = 0; |
| blk_id_in_ctb = 0; |
| |
| GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic); |
| |
| /* Get the number of sync units in a row based on encode/non enocde layer */ |
| num_sync_units_in_row = num_blks_in_row; |
| |
| /*************************************************************************/ |
| /* Picture limit on all 4 sides. This will be used to set mv limits for */ |
| /* every block given its coordinate. Note thsi assumes that the min amt */ |
| /* of padding to right of pic is equal to the blk size. If we go all the */ |
| /* way upto 64x64, then the min padding on right size of picture should */ |
| /* be 64, and also on bottom side of picture. */ |
| /*************************************************************************/ |
| SET_PIC_LIMIT( |
| s_pic_limit_inp, |
| ps_curr_layer->i4_pad_x_inp, |
| ps_curr_layer->i4_pad_y_inp, |
| ps_curr_layer->i4_wd, |
| ps_curr_layer->i4_ht, |
| s_search_prms_blk.i4_num_steps_post_refine); |
| |
| SET_PIC_LIMIT( |
| s_pic_limit_rec, |
| ps_curr_layer->i4_pad_x_rec, |
| ps_curr_layer->i4_pad_y_rec, |
| ps_curr_layer->i4_wd, |
| ps_curr_layer->i4_ht, |
| s_search_prms_blk.i4_num_steps_post_refine); |
| |
| /*************************************************************************/ |
| /* set the MV limit per ref. pic. */ |
| /* - P pic. : Based on the config params. */ |
| /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */ |
| /*************************************************************************/ |
| { |
| WORD32 ref_ctr; |
| /* Only for B/b pic. */ |
| if(1 == ps_ctxt->s_frm_prms.bidir_enabled) |
| { |
| WORD16 i2_mv_y_per_poc, i2_max_mv_y; |
| WORD32 cur_poc, ref_poc, abs_poc_diff; |
| |
| cur_poc = ps_ctxt->i4_curr_poc; |
| |
| /* Get abs MAX for symmetric search */ |
| i2_mv_y_per_poc = MAX( |
| ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[ps_refine_prms->i4_layer_id], |
| (ABS(ps_ctxt->s_coarse_dyn_range_prms |
| .i2_dyn_min_y_per_poc[ps_refine_prms->i4_layer_id]))); |
| |
| for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++) |
| { |
| ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]; |
| abs_poc_diff = ABS((cur_poc - ref_poc)); |
| /* Get the cur. max MV based on POC distance */ |
| i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff; |
| i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y); |
| |
| as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x; |
| as_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y; |
| as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x; |
| as_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y; |
| } |
| } |
| else |
| { |
| /* Set the Config. File Params for P pic. */ |
| for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++) |
| { |
| as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x; |
| as_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y; |
| as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x; |
| as_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y; |
| } |
| } |
| } |
| |
| /* EIID: Calculate threshold based on quality preset and/or temporal layers */ |
| if(e_me_quality_presets == ME_MEDIUM_SPEED) |
| { |
| i4_threshold_multiplier = 1; |
| i4_threshold_divider = 4; |
| } |
| else if(e_me_quality_presets == ME_HIGH_SPEED) |
| { |
| i4_threshold_multiplier = 1; |
| i4_threshold_divider = 2; |
| } |
| else if((e_me_quality_presets == ME_XTREME_SPEED) || (e_me_quality_presets == ME_XTREME_SPEED_25)) |
| { |
| #if OLD_XTREME_SPEED |
| /* Hard coding the temporal ID value to 1, if it is older xtreme speed */ |
| i4_temporal_layer = 1; |
| #endif |
| if(i4_temporal_layer == 0) |
| { |
| i4_threshold_multiplier = 3; |
| i4_threshold_divider = 4; |
| } |
| else if(i4_temporal_layer == 1) |
| { |
| i4_threshold_multiplier = 3; |
| i4_threshold_divider = 4; |
| } |
| else if(i4_temporal_layer == 2) |
| { |
| i4_threshold_multiplier = 1; |
| i4_threshold_divider = 1; |
| } |
| else |
| { |
| i4_threshold_multiplier = 5; |
| i4_threshold_divider = 4; |
| } |
| } |
| else if(e_me_quality_presets == ME_HIGH_QUALITY) |
| { |
| i4_threshold_multiplier = 1; |
| i4_threshold_divider = 1; |
| } |
| |
| /*************************************************************************/ |
| /*************************************************************************/ |
| /*************************************************************************/ |
| /* START OF THE CORE LOOP */ |
| /* If Encode is 0, then we just loop over each blk */ |
| /*************************************************************************/ |
| /*************************************************************************/ |
| /*************************************************************************/ |
| while(0 == end_of_frame) |
| { |
| job_queue_t *ps_job; |
| ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_row; //EIID |
| WORD32 i4_ctb_row_ctr; //counter to calculate CTB row counter. It's (row_ctr /4) |
| WORD32 i4_num_ctbs_in_row = (num_blks_in_row + 3) / 4; //calculations verified for L1 only |
| //+3 to get ceil values when divided by 4 |
| WORD32 i4_num_4x4_blocks_in_ctb_at_l1 = |
| 8 * 8; //considering CTB size 32x32 at L1. hardcoded for now |
| //if there is variable for ctb size use that and this variable can be derived |
| WORD32 offset_val, check_dep_pos, set_dep_pos; |
| void *pv_hme_dep_mngr; |
| ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row; |
| |
| /* Get the current layer HME Dep Mngr */ |
| /* Note : Use layer_id - 1 in HME layers */ |
| |
| pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_refine_prms->i4_layer_id - 1]; |
| |
| /* Get the current row from the job queue */ |
| ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job( |
| ps_multi_thrd_ctxt, lyr_job_type, 1, i4_ping_pong); |
| |
| /* If all rows are done, set the end of process flag to 1, */ |
| /* and the current row to -1 */ |
| if(NULL == ps_job) |
| { |
| blk_y = -1; |
| end_of_frame = 1; |
| |
| continue; |
| } |
| |
| if(1 == ps_ctxt->s_frm_prms.is_i_pic) |
| { |
| /* set the output dependency of current row */ |
| ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong); |
| continue; |
| } |
| |
| blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no; |
| blk_x = 0; |
| i4_ctb_x = 0; |
| |
| /* wait for Corresponding Pre intra Job to be completed */ |
| if(1 == ps_refine_prms->i4_layer_id) |
| { |
| volatile UWORD32 i4_l1_done; |
| volatile UWORD32 *pi4_l1_done; |
| pi4_l1_done = (volatile UWORD32 *)&ps_multi_thrd_ctxt |
| ->aai4_l1_pre_intra_done[i4_ping_pong][blk_y >> 2]; |
| i4_l1_done = *pi4_l1_done; |
| while(!i4_l1_done) |
| { |
| i4_l1_done = *pi4_l1_done; |
| } |
| } |
| /* Set Variables for Dep. Checking and Setting */ |
| set_dep_pos = blk_y + 1; |
| if(blk_y > 0) |
| { |
| offset_val = 2; |
| check_dep_pos = blk_y - 1; |
| } |
| else |
| { |
| /* First row should run without waiting */ |
| offset_val = -1; |
| check_dep_pos = 0; |
| } |
| |
| /* EIID: calculate ed_blk_ctxt pointer for current row */ |
| /* valid for only layer-1. not varified and used for other layers */ |
| i4_ctb_row_ctr = blk_y / 4; |
| ps_ed_blk_ctxt_curr_row = |
| ps_ctxt->ps_ed_blk + (i4_ctb_row_ctr * i4_num_ctbs_in_row * |
| i4_num_4x4_blocks_in_ctb_at_l1); //valid for L1 only |
| ps_ed_ctb_l1_row = ps_ctxt->ps_ed_ctb_l1 + (i4_ctb_row_ctr * i4_num_ctbs_in_row); |
| |
| /* if non-encode layer then i4_ctb_x will be same as blk_x */ |
| /* loop over all the units is a row */ |
| for(; i4_ctb_x < num_sync_units_in_row; i4_ctb_x++) |
| { |
| ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_ctb; //EIDD |
| ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_curr; |
| WORD32 i4_ctb_blk_ctr = i4_ctb_x / 4; |
| |
| /* Wait till top row block is processed */ |
| /* Currently checking till top right block*/ |
| |
| /* Disabled since all candidates, except for */ |
| /* L and C, are projected from the coarser layer, */ |
| /* only in ME_HIGH_SPEED mode */ |
| if((ME_MEDIUM_SPEED > e_me_quality_presets)) |
| { |
| if(i4_ctb_x < (num_sync_units_in_row - 1)) |
| { |
| ihevce_dmgr_chk_row_row_sync( |
| pv_hme_dep_mngr, |
| i4_ctb_x, |
| offset_val, |
| check_dep_pos, |
| 0, /* Col Tile No. : Not supported in PreEnc*/ |
| ps_ctxt->thrd_id); |
| } |
| } |
| |
| { |
| /* for non encoder layer only one block is processed */ |
| num_blks_in_this_ctb = 1; |
| } |
| |
| /* EIID: derive ed_ctxt ptr for current CTB */ |
| ps_ed_blk_ctxt_curr_ctb = |
| ps_ed_blk_ctxt_curr_row + |
| (i4_ctb_blk_ctr * |
| i4_num_4x4_blocks_in_ctb_at_l1); //currently valid for l1 layer only |
| ps_ed_ctb_l1_curr = ps_ed_ctb_l1_row + i4_ctb_blk_ctr; |
| |
| /* loop over all the blocks in CTB will always be 1 */ |
| for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++) |
| { |
| { |
| /* non encode layer */ |
| blk_x = i4_ctb_x; |
| blk_id_in_full_ctb = 0; |
| s_search_prms_blk.i4_cu_x_off = s_search_prms_blk.i4_cu_y_off = 0; |
| } |
| |
| /* get the current input blk point */ |
| pos_x = blk_x << blk_size_shift; |
| pos_y = blk_y << blk_size_shift; |
| pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride); |
| |
| /*********************************************************************/ |
| /* replicate the inp buffer at blk or ctb level for each ref id, */ |
| /* Instead of searching with wk * ref(k), we search with Ik = I / wk */ |
| /* thereby avoiding a bloat up of memory. If we did all references */ |
| /* weighted pred, we will end up with a duplicate copy of each ref */ |
| /* at each layer, since we need to preserve the original reference. */ |
| /* ToDo: Need to observe performance with this mechanism and compare */ |
| /* with case where ref is weighted. */ |
| /*********************************************************************/ |
| if(blk_id_in_ctb == 0) |
| { |
| fp_get_wt_inp( |
| ps_curr_layer, |
| &ps_ctxt->s_wt_pred, |
| unit_size, |
| pos_x, |
| pos_y, |
| unit_size, |
| ps_ctxt->num_ref_future + ps_ctxt->num_ref_past, |
| ps_ctxt->i4_wt_pred_enable_flag); |
| } |
| |
| s_search_prms_blk.i4_x_off = blk_x << blk_size_shift; |
| s_search_prms_blk.i4_y_off = blk_y << blk_size_shift; |
| /* Select search results from a suitable search result in the context */ |
| { |
| ps_search_results = &ps_ctxt->s_search_results_8x8; |
| } |
| |
| s_search_prms_blk.ps_search_results = ps_search_results; |
| |
| /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */ |
| hme_reset_search_results( |
| ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL); |
| |
| /* Loop across different Ref IDx */ |
| for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref_fpel; i1_ref_idx++) |
| { |
| S32 next_blk_offset = (e_search_blk_size == BLK_16x16) ? 22 : 12; |
| S32 prev_blk_offset = 6; |
| S32 resultid; |
| |
| /*********************************************************************/ |
| /* For every blk in the picture, the search range needs to be derived*/ |
| /* Any blk can have any mv, but practical search constraints are */ |
| /* imposed by the picture boundary and amt of padding. */ |
| /*********************************************************************/ |
| /* MV limit is different based on ref. PIC */ |
| hme_derive_search_range( |
| &s_range_prms_inp, |
| &s_pic_limit_inp, |
| &as_mv_limit[i1_ref_idx], |
| pos_x, |
| pos_y, |
| blk_wd, |
| blk_ht); |
| hme_derive_search_range( |
| &s_range_prms_rec, |
| &s_pic_limit_rec, |
| &as_mv_limit[i1_ref_idx], |
| pos_x, |
| pos_y, |
| blk_wd, |
| blk_ht); |
| |
| s_search_prms_blk.i1_ref_idx = i1_ref_idx; |
| ps_candt_zeromv->i1_ref_idx = i1_ref_idx; |
| |
| i4_num_srch_cands = 1; |
| |
| if(1 != ps_refine_prms->i4_layer_id) |
| { |
| S32 x, y; |
| x = gau1_encode_to_raster_x[blk_id_in_full_ctb]; |
| y = gau1_encode_to_raster_y[blk_id_in_full_ctb]; |
| |
| if(ME_MEDIUM_SPEED > e_me_quality_presets) |
| { |
| hme_get_spatial_candt( |
| ps_curr_layer, |
| e_search_blk_size, |
| blk_x, |
| blk_y, |
| i1_ref_idx, |
| &as_top_neighbours[0], |
| &as_left_neighbours[0], |
| 0, |
| ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1), |
| 0, |
| ps_refine_prms->i4_encode); |
| |
| *ps_candt_tr = as_top_neighbours[3]; |
| *ps_candt_t = as_top_neighbours[1]; |
| *ps_candt_tl = as_top_neighbours[0]; |
| i4_num_srch_cands += 3; |
| } |
| else |
| { |
| layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank; |
| S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size]; |
| S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size]; |
| search_node_t *ps_search_node; |
| S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y; |
| hme_mv_t *ps_mv, *ps_mv_base; |
| S08 *pi1_ref_idx, *pi1_ref_idx_base; |
| S32 jump = 1, mvs_in_blk, mvs_in_row; |
| S32 shift = (ps_refine_prms->i4_encode ? 2 : 0); |
| |
| if(i4_blk_size1 != i4_blk_size2) |
| { |
| blk_x_temp <<= 1; |
| blk_y_temp <<= 1; |
| jump = 2; |
| if((i4_blk_size1 << 2) == i4_blk_size2) |
| { |
| blk_x_temp <<= 1; |
| blk_y_temp <<= 1; |
| jump = 4; |
| } |
| } |
| |
| mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk; |
| mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row; |
| |
| /* Adjust teh blk coord to point to top left locn */ |
| blk_x_temp -= 1; |
| blk_y_temp -= 1; |
| |
| /* Pick up the mvs from the location */ |
| i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk); |
| i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp); |
| |
| ps_mv = ps_layer_mvbank->ps_mv + i4_offset; |
| pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; |
| |
| ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref); |
| pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref); |
| |
| ps_mv_base = ps_mv; |
| pi1_ref_idx_base = pi1_ref_idx; |
| |
| ps_search_node = &as_left_neighbours[0]; |
| ps_mv = ps_mv_base + mvs_in_row; |
| pi1_ref_idx = pi1_ref_idx_base + mvs_in_row; |
| COPY_MV_TO_SEARCH_NODE( |
| ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift); |
| |
| i4_num_srch_cands++; |
| } |
| } |
| else |
| { |
| S32 x, y; |
| x = gau1_encode_to_raster_x[blk_id_in_full_ctb]; |
| y = gau1_encode_to_raster_y[blk_id_in_full_ctb]; |
| |
| if(ME_MEDIUM_SPEED > e_me_quality_presets) |
| { |
| hme_get_spatial_candt_in_l1_me( |
| ps_curr_layer, |
| e_search_blk_size, |
| blk_x, |
| blk_y, |
| i1_ref_idx, |
| !ps_search_results->pu1_is_past[i1_ref_idx], |
| &as_top_neighbours[0], |
| &as_left_neighbours[0], |
| 0, |
| ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1), |
| 0, |
| ps_ctxt->s_frm_prms.u1_num_active_ref_l0, |
| ps_ctxt->s_frm_prms.u1_num_active_ref_l1); |
| |
| *ps_candt_tr = as_top_neighbours[3]; |
| *ps_candt_t = as_top_neighbours[1]; |
| *ps_candt_tl = as_top_neighbours[0]; |
| |
| i4_num_srch_cands += 3; |
| } |
| else |
| { |
| layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank; |
| S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size]; |
| S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size]; |
| S32 i4_mv_pos_in_implicit_array; |
| search_node_t *ps_search_node; |
| S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y; |
| hme_mv_t *ps_mv, *ps_mv_base; |
| S08 *pi1_ref_idx, *pi1_ref_idx_base; |
| S32 jump = 1, mvs_in_blk, mvs_in_row; |
| S32 shift = (ps_refine_prms->i4_encode ? 2 : 0); |
| U08 u1_pred_dir = !ps_search_results->pu1_is_past[i1_ref_idx]; |
| S32 i4_num_results_in_given_dir = |
| ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * |
| ps_ctxt->s_frm_prms.u1_num_active_ref_l1) |
| : (ps_layer_mvbank->i4_num_mvs_per_ref * |
| ps_ctxt->s_frm_prms.u1_num_active_ref_l0)); |
| |
| if(i4_blk_size1 != i4_blk_size2) |
| { |
| blk_x_temp <<= 1; |
| blk_y_temp <<= 1; |
| jump = 2; |
| if((i4_blk_size1 << 2) == i4_blk_size2) |
| { |
| blk_x_temp <<= 1; |
| blk_y_temp <<= 1; |
| jump = 4; |
| } |
| } |
| |
| mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk; |
| mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row; |
| |
| /* Adjust teh blk coord to point to top left locn */ |
| blk_x_temp -= 1; |
| blk_y_temp -= 1; |
| |
| /* Pick up the mvs from the location */ |
| i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk); |
| i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp); |
| |
| i4_offset += |
| ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * |
| ps_ctxt->s_frm_prms.u1_num_active_ref_l0) |
| : 0); |
| |
| ps_mv = ps_layer_mvbank->ps_mv + i4_offset; |
| pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; |
| |
| ps_mv_base = ps_mv; |
| pi1_ref_idx_base = pi1_ref_idx; |
| |
| { |
| /* ps_mv and pi1_ref_idx now point to the top left locn */ |
| ps_search_node = &as_left_neighbours[0]; |
| ps_mv = ps_mv_base + mvs_in_row; |
| pi1_ref_idx = pi1_ref_idx_base + mvs_in_row; |
| |
| i4_mv_pos_in_implicit_array = |
| hme_find_pos_of_implicitly_stored_ref_id( |
| pi1_ref_idx, i1_ref_idx, 0, i4_num_results_in_given_dir); |
| |
| if(-1 != i4_mv_pos_in_implicit_array) |
| { |
| COPY_MV_TO_SEARCH_NODE( |
| ps_search_node, |
| &ps_mv[i4_mv_pos_in_implicit_array], |
| &pi1_ref_idx[i4_mv_pos_in_implicit_array], |
| i1_ref_idx, |
| shift); |
| } |
| else |
| { |
| ps_search_node->u1_is_avail = 0; |
| ps_search_node->s_mv.i2_mvx = 0; |
| ps_search_node->s_mv.i2_mvy = 0; |
| ps_search_node->i1_ref_idx = i1_ref_idx; |
| } |
| |
| i4_num_srch_cands++; |
| } |
| } |
| } |
| |
| *ps_candt_l = as_left_neighbours[0]; |
| |
| /* when 16x16 is searched in an encode layer, and the prev layer */ |
| /* stores results for 4x4 blks, we project 5 candts corresponding */ |
| /* to (2,2), (2,14), (14,2), 14,14) and 2nd best of (2,2) */ |
| /* However in other cases, only 2,2 best and 2nd best reqd */ |
| resultid = 0; |
| pf_hme_project_coloc_candt( |
| ps_candt_prj_coloc[0], |
| ps_curr_layer, |
| ps_coarse_layer, |
| pos_x + 2, |
| pos_y + 2, |
| i1_ref_idx, |
| resultid); |
| |
| i4_num_srch_cands++; |
| |
| resultid = 1; |
| if(num_results_prev_layer > 1) |
| { |
| pf_hme_project_coloc_candt( |
| ps_candt_prj_coloc[1], |
| ps_curr_layer, |
| ps_coarse_layer, |
| pos_x + 2, |
| pos_y + 2, |
| i1_ref_idx, |
| resultid); |
| |
| i4_num_srch_cands++; |
| } |
| |
| resultid = 0; |
| |
| if(ME_MEDIUM_SPEED <= e_me_quality_presets) |
| { |
| pf_hme_project_coloc_candt( |
| ps_candt_prj_t[0], |
| ps_curr_layer, |
| ps_coarse_layer, |
| pos_x, |
| pos_y - prev_blk_offset, |
| i1_ref_idx, |
| resultid); |
| |
| i4_num_srch_cands++; |
| } |
| |
| { |
| pf_hme_project_coloc_candt( |
| ps_candt_prj_br[0], |
| ps_curr_layer, |
| ps_coarse_layer, |
| pos_x + next_blk_offset, |
| pos_y + next_blk_offset, |
| i1_ref_idx, |
| resultid); |
| pf_hme_project_coloc_candt( |
| ps_candt_prj_bl[0], |
| ps_curr_layer, |
| ps_coarse_layer, |
| pos_x - prev_blk_offset, |
| pos_y + next_blk_offset, |
| i1_ref_idx, |
| resultid); |
| pf_hme_project_coloc_candt( |
| ps_candt_prj_r[0], |
| ps_curr_layer, |
| ps_coarse_layer, |
| pos_x + next_blk_offset, |
| pos_y, |
| i1_ref_idx, |
| resultid); |
| pf_hme_project_coloc_candt( |
| ps_candt_prj_b[0], |
| ps_curr_layer, |
| ps_coarse_layer, |
| pos_x, |
| pos_y + next_blk_offset, |
| i1_ref_idx, |
| resultid); |
| |
| i4_num_srch_cands += 4; |
| |
| if(ME_MEDIUM_SPEED <= e_me_quality_presets) |
| { |
| pf_hme_project_coloc_candt( |
| ps_candt_prj_tr[0], |
| ps_curr_layer, |
| ps_coarse_layer, |
| pos_x + next_blk_offset, |
| pos_y - prev_blk_offset, |
| i1_ref_idx, |
| resultid); |
| pf_hme_project_coloc_candt( |
| ps_candt_prj_tl[0], |
| ps_curr_layer, |
| ps_coarse_layer, |
| pos_x - prev_blk_offset, |
| pos_y - prev_blk_offset, |
| i1_ref_idx, |
| resultid); |
| |
| i4_num_srch_cands += 2; |
| } |
| } |
| if((num_results_prev_layer > 1) && (e_search_complexity >= SEARCH_CX_MED)) |
| { |
| resultid = 1; |
| pf_hme_project_coloc_candt( |
| ps_candt_prj_br[1], |
| ps_curr_layer, |
| ps_coarse_layer, |
| pos_x + next_blk_offset, |
| pos_y + next_blk_offset, |
| i1_ref_idx, |
| resultid); |
| pf_hme_project_coloc_candt( |
| ps_candt_prj_bl[1], |
| ps_curr_layer, |
| ps_coarse_layer, |
| pos_x - prev_blk_offset, |
| pos_y + next_blk_offset, |
| i1_ref_idx, |
| resultid); |
| pf_hme_project_coloc_candt( |
| ps_candt_prj_r[1], |
| ps_curr_layer, |
| ps_coarse_layer, |
| pos_x + next_blk_offset, |
| pos_y, |
| i1_ref_idx, |
| resultid); |
| pf_hme_project_coloc_candt( |
| ps_candt_prj_b[1], |
| ps_curr_layer, |
| ps_coarse_layer, |
| pos_x, |
| pos_y + next_blk_offset, |
| i1_ref_idx, |
| resultid); |
| |
| i4_num_srch_cands += 4; |
| |
| pf_hme_project_coloc_candt( |
| ps_candt_prj_tr[1], |
| ps_curr_layer, |
| ps_coarse_layer, |
| pos_x + next_blk_offset, |
| pos_y - prev_blk_offset, |
| i1_ref_idx, |
| resultid); |
| pf_hme_project_coloc_candt( |
| ps_candt_prj_tl[1], |
| ps_curr_layer, |
| ps_coarse_layer, |
| pos_x - prev_blk_offset, |
| pos_y - prev_blk_offset, |
| i1_ref_idx, |
| resultid); |
| pf_hme_project_coloc_candt( |
| ps_candt_prj_t[1], |
| ps_curr_layer, |
| ps_coarse_layer, |
| pos_x, |
| pos_y - prev_blk_offset, |
| i1_ref_idx, |
| resultid); |
| |
| i4_num_srch_cands += 3; |
| } |
| |
| /* Note this block also clips the MV range for all candidates */ |
| #ifdef _DEBUG |
| { |
| S32 candt; |
| range_prms_t *ps_range_prms; |
| |
| S32 num_ref_valid = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past; |
| for(candt = 0; candt < i4_num_srch_cands; candt++) |
| { |
| search_node_t *ps_search_node; |
| |
| ps_search_node = |
| s_search_prms_blk.ps_search_candts[candt].ps_search_node; |
| |
| ps_range_prms = s_search_prms_blk.aps_mv_range[0]; |
| |
| if((ps_search_node->i1_ref_idx >= num_ref_valid) || |
| (ps_search_node->i1_ref_idx < 0)) |
| { |
| ASSERT(0); |
| } |
| } |
| } |
| #endif |
| |
| { |
| S32 srch_cand; |
| S32 num_unique_nodes = 0; |
| S32 num_nodes_searched = 0; |
| S32 num_best_cand = 0; |
| S08 i1_grid_enable = 0; |
| search_node_t as_best_two_proj_node[TOT_NUM_PARTS * 2]; |
| /* has list of valid partition to search terminated by -1 */ |
| S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1]; |
| S32 center_x; |
| S32 center_y; |
| |
| /* indicates if the centre point of grid needs to be explicitly added for search */ |
| S32 add_centre = 0; |
| |
| memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map)); |
| center_x = ps_candt_prj_coloc[0]->s_mv.i2_mvx; |
| center_y = ps_candt_prj_coloc[0]->s_mv.i2_mvy; |
| |
| for(srch_cand = 0; |
| (srch_cand < i4_num_srch_cands) && |
| (num_unique_nodes <= s_search_prms_blk.i4_num_init_candts); |
| srch_cand++) |
| { |
| search_node_t s_search_node_temp = |
| s_search_prms_blk.ps_search_candts[srch_cand].ps_search_node[0]; |
| |
| s_search_node_temp.i1_ref_idx = i1_ref_idx; //TEMP FIX; |
| |
| /* Clip the motion vectors as well here since after clipping |
| two candidates can become same and they will be removed during deduplication */ |
| CLIP_MV_WITHIN_RANGE( |
| s_search_node_temp.s_mv.i2_mvx, |
| s_search_node_temp.s_mv.i2_mvy, |
| s_search_prms_blk.aps_mv_range[0], |
| ps_refine_prms->i4_num_steps_fpel_refine, |
| ps_refine_prms->i4_num_steps_hpel_refine, |
| ps_refine_prms->i4_num_steps_qpel_refine); |
| |
| /* PT_C */ |
| INSERT_NEW_NODE( |
| as_unique_search_nodes, |
| num_unique_nodes, |
| s_search_node_temp, |
| 0, |
| au4_unique_node_map, |
| center_x, |
| center_y, |
| 1); |
| |
| num_nodes_searched += 1; |
| } |
| num_unique_nodes = |
| MIN(num_unique_nodes, s_search_prms_blk.i4_num_init_candts); |
| |
| /* If number of candidates projected/number of candidates to be refined are more than 2, |
| then filter out and choose the best two here */ |
| if(num_unique_nodes >= 2) |
| { |
| S32 num_results; |
| S32 cnt; |
| S32 *pi4_valid_part_ids; |
| s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0]; |
| s_search_prms_blk.i4_num_search_nodes = num_unique_nodes; |
| pi4_valid_part_ids = &ai4_valid_part_ids[0]; |
| |
| /* pi4_valid_part_ids is updated inside */ |
| hme_pred_search_no_encode( |
| &s_search_prms_blk, |
| ps_curr_layer, |
| &ps_ctxt->s_wt_pred, |
| pi4_valid_part_ids, |
| 1, |
| e_me_quality_presets, |
| i1_grid_enable, |
| (ihevce_me_optimised_function_list_t *) |
| ps_ctxt->pv_me_optimised_function_list |
| |
| ); |
| |
| num_best_cand = 0; |
| cnt = 0; |
| num_results = ps_search_results->u1_num_results_per_part; |
| |
| while((id = pi4_valid_part_ids[cnt++]) >= 0) |
| { |
| num_results = |
| MIN(ps_refine_prms->pu1_num_best_results[id], num_results); |
| |
| for(i = 0; i < num_results; i++) |
| { |
| search_node_t s_search_node_temp; |
| s_search_node_temp = |
| *(ps_search_results->aps_part_results[i1_ref_idx][id] + i); |
| if(s_search_node_temp.i1_ref_idx >= 0) |
| { |
| INSERT_NEW_NODE_NOMAP( |
| as_best_two_proj_node, |
| num_best_cand, |
| s_search_node_temp, |
| 0); |
| } |
| } |
| } |
| } |
| else |
| { |
| add_centre = 1; |
| num_best_cand = num_unique_nodes; |
| as_best_two_proj_node[0] = as_unique_search_nodes[0]; |
| } |
| |
| num_unique_nodes = 0; |
| num_nodes_searched = 0; |
| |
| if(1 == num_best_cand) |
| { |
| search_node_t s_search_node_temp = as_best_two_proj_node[0]; |
| S16 i2_mv_x = s_search_node_temp.s_mv.i2_mvx; |
| S16 i2_mv_y = s_search_node_temp.s_mv.i2_mvy; |
| S08 i1_ref_idx = s_search_node_temp.i1_ref_idx; |
| |
| i1_grid_enable = 1; |
| |
| as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1; |
| as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1; |
| as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; |
| |
| as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x; |
| as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1; |
| as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; |
| |
| as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1; |
| as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1; |
| as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; |
| |
| as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1; |
| as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y; |
| as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; |
| |
| as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1; |
| as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y; |
| as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; |
| |
| as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1; |
| as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1; |
| as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; |
| |
| as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x; |
| as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1; |
| as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; |
| |
| as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1; |
| as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1; |
| as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; |
| |
| if(add_centre) |
| { |
| as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x; |
| as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y; |
| as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx; |
| } |
| } |
| else |
| { |
| /* For the candidates where refinement was required, choose the best two */ |
| for(srch_cand = 0; srch_cand < num_best_cand; srch_cand++) |
| { |
| search_node_t s_search_node_temp = as_best_two_proj_node[srch_cand]; |
| WORD32 mv_x = s_search_node_temp.s_mv.i2_mvx; |
| WORD32 mv_y = s_search_node_temp.s_mv.i2_mvy; |
| |
| /* Because there may not be two best unique candidates (because of clipping), |
| second best candidate can be uninitialized, ignore that */ |
| if(s_search_node_temp.s_mv.i2_mvx == INTRA_MV || |
| s_search_node_temp.i1_ref_idx < 0) |
| { |
| num_nodes_searched++; |
| continue; |
| } |
| |
| /* PT_C */ |
| /* Since the center point has already be evaluated and best results are persistent, |
| it will not be evaluated again */ |
| if(add_centre) /* centre point added explicitly again if search results is not updated */ |
| { |
| INSERT_NEW_NODE( |
| as_unique_search_nodes, |
| num_unique_nodes, |
| s_search_node_temp, |
| 0, |
| au4_unique_node_map, |
| center_x, |
| center_y, |
| 1); |
| } |
| |
| /* PT_L */ |
| s_search_node_temp.s_mv.i2_mvx = mv_x - 1; |
| s_search_node_temp.s_mv.i2_mvy = mv_y; |
| INSERT_NEW_NODE( |
| as_unique_search_nodes, |
| num_unique_nodes, |
| s_search_node_temp, |
| 0, |
| au4_unique_node_map, |
| center_x, |
| center_y, |
| 1); |
| |
| /* PT_T */ |
| s_search_node_temp.s_mv.i2_mvx = mv_x; |
| s_search_node_temp.s_mv.i2_mvy = mv_y - 1; |
| INSERT_NEW_NODE( |
| as_unique_search_nodes, |
| num_unique_nodes, |
| s_search_node_temp, |
| 0, |
| au4_unique_node_map, |
| center_x, |
| center_y, |
| 1); |
| |
| /* PT_R */ |
| s_search_node_temp.s_mv.i2_mvx = mv_x + 1; |
| s_search_node_temp.s_mv.i2_mvy = mv_y; |
| INSERT_NEW_NODE( |
| as_unique_search_nodes, |
| num_unique_nodes, |
| s_search_node_temp, |
| 0, |
| au4_unique_node_map, |
| center_x, |
| center_y, |
| 1); |
| |
| /* PT_B */ |
| s_search_node_temp.s_mv.i2_mvx = mv_x; |
| s_search_node_temp.s_mv.i2_mvy = mv_y + 1; |
| INSERT_NEW_NODE( |
| as_unique_search_nodes, |
| num_unique_nodes, |
| s_search_node_temp, |
| 0, |
| au4_unique_node_map, |
| center_x, |
| center_y, |
| 1); |
| |
| /* PT_TL */ |
| s_search_node_temp.s_mv.i2_mvx = mv_x - 1; |
| s_search_node_temp.s_mv.i2_mvy = mv_y - 1; |
| INSERT_NEW_NODE( |
| as_unique_search_nodes, |
| num_unique_nodes, |
| s_search_node_temp, |
| 0, |
| au4_unique_node_map, |
| center_x, |
| center_y, |
| 1); |
| |
| /* PT_TR */ |
| s_search_node_temp.s_mv.i2_mvx = mv_x + 1; |
| s_search_node_temp.s_mv.i2_mvy = mv_y - 1; |
| INSERT_NEW_NODE( |
| as_unique_search_nodes, |
| num_unique_nodes, |
| s_search_node_temp, |
| 0, |
| au4_unique_node_map, |
| center_x, |
| center_y, |
| 1); |
| |
| /* PT_BL */ |
| s_search_node_temp.s_mv.i2_mvx = mv_x - 1; |
| s_search_node_temp.s_mv.i2_mvy = mv_y + 1; |
| INSERT_NEW_NODE( |
| as_unique_search_nodes, |
| num_unique_nodes, |
| s_search_node_temp, |
| 0, |
| au4_unique_node_map, |
| center_x, |
| center_y, |
| 1); |
| |
| /* PT_BR */ |
| s_search_node_temp.s_mv.i2_mvx = mv_x + 1; |
| s_search_node_temp.s_mv.i2_mvy = mv_y + 1; |
| INSERT_NEW_NODE( |
| as_unique_search_nodes, |
| num_unique_nodes, |
| s_search_node_temp, |
| 0, |
| au4_unique_node_map, |
| center_x, |
| center_y, |
| 1); |
| } |
| } |
| |
| s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0]; |
| s_search_prms_blk.i4_num_search_nodes = num_unique_nodes; |
| |
| /*****************************************************************/ |
| /* Call the search algorithm, this includes: */ |
| /* Pre-Search-Refinement (for coarse candts) */ |
| /* Search on each candidate */ |
| /* Post Search Refinement on winners/other new candidates */ |
| /*****************************************************************/ |
| |
| hme_pred_search_no_encode( |
| &s_search_prms_blk, |
| ps_curr_layer, |
| &ps_ctxt->s_wt_pred, |
| ai4_valid_part_ids, |
| 0, |
| e_me_quality_presets, |
| i1_grid_enable, |
| (ihevce_me_optimised_function_list_t *) |
| ps_ctxt->pv_me_optimised_function_list); |
| |
| i1_grid_enable = 0; |
| } |
| } |
| |
| /* for non encode layer update MV and end processing for block */ |
| { |
| WORD32 i4_ref_id, min_cost = 0x7fffffff, min_sad = 0; |
| search_node_t *ps_search_node; |
| /* now update the reqd results back to the layer mv bank. */ |
| if(1 == ps_refine_prms->i4_layer_id) |
| { |
| hme_update_mv_bank_in_l1_me( |
| ps_search_results, |
| ps_curr_layer->ps_layer_mvbank, |
| blk_x, |
| blk_y, |
| &s_mv_update_prms); |
| } |
| else |
| { |
| hme_update_mv_bank_noencode( |
| ps_search_results, |
| ps_curr_layer->ps_layer_mvbank, |
| blk_x, |
| blk_y, |
| &s_mv_update_prms); |
| } |
| |
| /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */ |
| /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ |
| if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) |
| { |
| WORD32 i4_j; |
| layer_mv_t *ps_layer_mv = ps_curr_layer->ps_layer_mvbank; |
| |
| //if (ps_layer_mv->e_blk_size == s_mv_update_prms.e_search_blk_size) |
| /* Not considering this for Dyn. Search Update */ |
| { |
| for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref; |
| i4_ref_id++) |
| { |
| ps_search_node = |
| ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N]; |
| |
| for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++) |
| { |
| hme_update_dynamic_search_params( |
| &ps_ctxt->s_coarse_dyn_range_prms |
| .as_dyn_range_prms[ps_refine_prms->i4_layer_id] |
| [i4_ref_id], |
| ps_search_node->s_mv.i2_mvy); |
| |
| ps_search_node++; |
| } |
| } |
| } |
| } |
| |
| if(1 == ps_refine_prms->i4_layer_id) |
| { |
| WORD32 wt_pred_val, log_wt_pred_val; |
| WORD32 ref_id_of_nearest_poc = 0; |
| WORD32 max_val = 0x7fffffff; |
| WORD32 max_l0_val = 0x7fffffff; |
| WORD32 max_l1_val = 0x7fffffff; |
| WORD32 cur_val; |
| WORD32 i4_local_weighted_sad, i4_local_cost_weighted_pred; |
| |
| WORD32 bestl0_sad = 0x7fffffff; |
| WORD32 bestl1_sad = 0x7fffffff; |
| search_node_t *ps_best_l0_blk = NULL, *ps_best_l1_blk = NULL; |
| |
| for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref; |
| i4_ref_id++) |
| { |
| wt_pred_val = ps_ctxt->s_wt_pred.a_wpred_wt[i4_ref_id]; |
| log_wt_pred_val = ps_ctxt->s_wt_pred.wpred_log_wdc; |
| |
| ps_search_node = |
| ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N]; |
| |
| i4_local_weighted_sad = ((ps_search_node->i4_sad * wt_pred_val) + |
| ((1 << log_wt_pred_val) >> 1)) >> |
| log_wt_pred_val; |
| |
| i4_local_cost_weighted_pred = |
| i4_local_weighted_sad + |
| (ps_search_node->i4_tot_cost - ps_search_node->i4_sad); |
| //the loop is redundant as the results are already sorted based on total cost |
| //for (i4_j = 0; i4_j < ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; i4_j++) |
| { |
| if(i4_local_cost_weighted_pred < min_cost) |
| { |
| min_cost = i4_local_cost_weighted_pred; |
| min_sad = i4_local_weighted_sad; |
| } |
| } |
| |
| /* For P frame, calculate the nearest poc which is either P or I frame*/ |
| if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) |
| { |
| if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]) |
| { |
| cur_val = |
| ABS(ps_ctxt->i4_curr_poc - |
| ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]); |
| if(cur_val < max_val) |
| { |
| max_val = cur_val; |
| ref_id_of_nearest_poc = i4_ref_id; |
| } |
| } |
| } |
| } |
| /*Store me cost wrt. to past frame only for P frame */ |
| if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) |
| { |
| if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc]) |
| { |
| WORD16 i2_mvx, i2_mvy; |
| |
| WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4); |
| WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4); |
| WORD32 z_scan_idx = |
| gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr]; |
| WORD32 wt, log_wt; |
| |
| /*ASSERT((ps_ctxt->i4_curr_poc - ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc]) |
| <= (1 + ps_ctxt->num_b_frms));*/ |
| |
| /*obtain mvx and mvy */ |
| i2_mvx = |
| ps_search_results |
| ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N] |
| ->s_mv.i2_mvx; |
| i2_mvy = |
| ps_search_results |
| ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N] |
| ->s_mv.i2_mvy; |
| |
| /*register the min cost for l1 me in blk context */ |
| wt = ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_of_nearest_poc]; |
| log_wt = ps_ctxt->s_wt_pred.wpred_log_wdc; |
| |
| /*register the min cost for l1 me in blk context */ |
| ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] = |
| ((ps_search_results |
| ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N] |
| ->i4_sad * |
| wt) + |
| ((1 << log_wt) >> 1)) >> |
| log_wt; |
| ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] = |
| ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] + |
| (ps_search_results |
| ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N] |
| ->i4_tot_cost - |
| ps_search_results |
| ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N] |
| ->i4_sad); |
| /*for complexity change detection*/ |
| ps_ctxt->i4_num_blks++; |
| if(ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] > |
| (8 /*blk width*/ * 8 /*blk height*/ * (1 + ps_ctxt->num_b_frms))) |
| { |
| ps_ctxt->i4_num_blks_high_sad++; |
| } |
| } |
| } |
| } |
| |
| /* EIID: Early inter intra decisions */ |
| /* tap L1 level SAD for inter intra decisions */ |
| if((e_me_quality_presets >= ME_MEDIUM_SPEED) && |
| (!ps_ctxt->s_frm_prms |
| .is_i_pic)) //for high-quality preset->disable early decisions |
| { |
| if(1 == ps_refine_prms->i4_layer_id) |
| { |
| WORD32 i4_min_sad_cost_8x8_block = min_cost; |
| ihevce_ed_blk_t *ps_curr_ed_blk_ctxt; |
| WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4); |
| WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4); |
| WORD32 z_scan_idx = |
| gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr]; |
| ps_curr_ed_blk_ctxt = ps_ed_blk_ctxt_curr_ctb + z_scan_idx; |
| |
| /*register the min cost for l1 me in blk context */ |
| ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] = |
| i4_min_sad_cost_8x8_block; |
| i4_num_comparisions++; |
| |
| /* take early inter-intra decision here */ |
| ps_curr_ed_blk_ctxt->intra_or_inter = 3; /*init saying eval both */ |
| #if DISABLE_INTRA_IN_BPICS |
| if((e_me_quality_presets == ME_XTREME_SPEED_25) && |
| (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)) |
| { |
| ps_curr_ed_blk_ctxt->intra_or_inter = |
| 2; /*eval only inter if inter cost is less */ |
| i4_num_inter_wins++; |
| } |
| else |
| #endif |
| { |
| if(ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] < |
| ((ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2] * |
| i4_threshold_multiplier) / |
| i4_threshold_divider)) |
| { |
| ps_curr_ed_blk_ctxt->intra_or_inter = |
| 2; /*eval only inter if inter cost is less */ |
| i4_num_inter_wins++; |
| } |
| } |
| |
| //{ |
| // DBG_PRINTF ("(blk x, blk y):(%d, %d)\t me:(ctb_x, ctb_y):(%d, %d)\t intra_SAD_COST: %d\tInter_SAD_COST: %d\n", |
| // blk_x,blk_y, |
| // i4_ctb_blk_ctr, i4_ctb_row_ctr, |
| // ps_curr_ed_blk_ctxt->i4_best_sad_8x8_l1_ipe, |
| // i4_min_sad_cost_8x8_block |
| // ); |
| //} |
| |
| } //end of layer-1 |
| } //end of if (e_me_quality_presets >= ME_MEDIUM_SPEED) |
| else |
| { |
| if(1 == ps_refine_prms->i4_layer_id) |
| { |
| WORD32 i4_min_sad_cost_8x8_block = min_cost; |
| WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4); |
| WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4); |
| WORD32 z_scan_idx = |
| gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr]; |
| |
| /*register the min cost for l1 me in blk context */ |
| ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] = |
| i4_min_sad_cost_8x8_block; |
| } |
| } |
| if(1 == ps_refine_prms->i4_layer_id) |
| { |
| WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4); |
| WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4); |
| WORD32 z_scan_idx = |
| gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr]; |
| |
| ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me_for_decide[z_scan_idx >> 2] = |
| min_sad; |
| |
| if(min_cost < |
| ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2]) |
| { |
| ps_ctxt->i4_L1_hme_best_cost += min_cost; |
| ps_ctxt->i4_L1_hme_sad += min_sad; |
| ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = min_sad; |
| } |
| else |
| { |
| ps_ctxt->i4_L1_hme_best_cost += |
| ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2]; |
| ps_ctxt->i4_L1_hme_sad += |
| ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2]; |
| ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = |
| ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2]; |
| } |
| } |
| } |
| } |
| |
| /* Update the number of blocks processed in the current row */ |
| if((ME_MEDIUM_SPEED > e_me_quality_presets)) |
| { |
| ihevce_dmgr_set_row_row_sync( |
| pv_hme_dep_mngr, |
| (i4_ctb_x + 1), |
| blk_y, |
| 0 /* Col Tile No. : Not supported in PreEnc*/); |
| } |
| } |
| |
| /* set the output dependency after completion of row */ |
| ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong); |
| } |
| } |