| /****************************************************************************** |
| * |
| * Copyright (C) 2018 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at: |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| ***************************************************************************** |
| * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| */ |
| |
| /*! |
| ****************************************************************************** |
| * \file ihevce_decomp_pre_intra_pass.c |
| * |
| * \brief |
| * This file contains definitions related to frame decomposition done during |
| * pre intra processing |
| * |
| * \date |
| * 19/02/2013 |
| * |
| * \author |
| * Ittiam |
| * |
| * List of Functions |
| * ihevce_intra_populate_mode_bits_cost() |
| * ihevce_8x8_sad_computer() |
| * ihevce_4x4_sad_computer() |
| * ihevce_ed_4x4_find_best_modes() |
| * ihevce_ed_calc_4x4_blk() |
| * ihevce_ed_calc_8x8_blk() |
| * ihevce_ed_calc_incomplete_ctb() |
| * ihevce_cu_level_qp_mod() |
| * ihevce_ed_calc_ctb() |
| * ihevce_ed_frame_init() |
| * ihevce_scale_by_2() |
| * ihevce_decomp_pre_intra_process_row() |
| * ihevce_decomp_pre_intra_process() |
| * ihevce_decomp_pre_intra_get_num_mem_recs() |
| * ihevce_decomp_pre_intra_get_mem_recs() |
| * ihevce_decomp_pre_intra_init() |
| * ihevce_decomp_pre_intra_frame_init() |
| * ihevce_merge_sort() |
| * ihevce_decomp_pre_intra_curr_frame_pre_intra_deinit() |
| * |
| ****************************************************************************** |
| */ |
| |
| /*****************************************************************************/ |
| /* File Includes */ |
| /*****************************************************************************/ |
| /* System include files */ |
| #include <stdio.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <assert.h> |
| #include <stdarg.h> |
| #include <stdint.h> |
| #include <math.h> |
| #include <limits.h> |
| |
| /* User include files */ |
| #include "ihevc_typedefs.h" |
| #include "itt_video_api.h" |
| #include "ihevce_api.h" |
| |
| #include "rc_cntrl_param.h" |
| #include "rc_frame_info_collector.h" |
| #include "rc_look_ahead_params.h" |
| |
| #include "ihevc_defs.h" |
| #include "ihevc_debug.h" |
| #include "ihevc_structs.h" |
| #include "ihevc_platform_macros.h" |
| #include "ihevc_deblk.h" |
| #include "ihevc_itrans_recon.h" |
| #include "ihevc_chroma_itrans_recon.h" |
| #include "ihevc_chroma_intra_pred.h" |
| #include "ihevc_intra_pred.h" |
| #include "ihevc_inter_pred.h" |
| #include "ihevc_mem_fns.h" |
| #include "ihevc_padding.h" |
| #include "ihevc_weighted_pred.h" |
| #include "ihevc_sao.h" |
| #include "ihevc_resi_trans.h" |
| #include "ihevc_quant_iquant_ssd.h" |
| #include "ihevc_cabac_tables.h" |
| |
| #include "ihevce_defs.h" |
| #include "ihevce_hle_interface.h" |
| #include "ihevce_lap_enc_structs.h" |
| #include "ihevce_multi_thrd_structs.h" |
| #include "ihevce_multi_thrd_funcs.h" |
| #include "ihevce_me_common_defs.h" |
| #include "ihevce_had_satd.h" |
| #include "ihevce_error_codes.h" |
| #include "ihevce_bitstream.h" |
| #include "ihevce_cabac.h" |
| #include "ihevce_rdoq_macros.h" |
| #include "ihevce_function_selector.h" |
| #include "ihevce_enc_structs.h" |
| #include "ihevce_entropy_structs.h" |
| #include "ihevce_cmn_utils_instr_set_router.h" |
| #include "ihevce_ipe_instr_set_router.h" |
| #include "ihevce_decomp_pre_intra_structs.h" |
| #include "ihevce_decomp_pre_intra_pass.h" |
| #include "ihevce_enc_loop_structs.h" |
| #include "hme_datatype.h" |
| #include "hme_interface.h" |
| #include "hme_common_defs.h" |
| #include "ihevce_global_tables.h" |
| |
| /*****************************************************************************/ |
| /* Global variables */ |
| /*****************************************************************************/ |
| |
| /** |
| ***************************************************************************** |
| * @brief subset of intra modes to be evaluated during pre enc intra process |
| ***************************************************************************** |
| */ |
| static const UWORD8 gau1_modes_to_eval[11] = { 0, 1, 26, 2, 6, 10, 14, 18, 22, 30, 34 }; |
| |
| /** |
| ***************************************************************************** |
| * @brief list of pointers to luma intra pred functions |
| ***************************************************************************** |
| */ |
| pf_intra_pred g_apf_lum_ip[NUM_IP_FUNCS]; |
| |
| /*****************************************************************************/ |
| /* Function Definitions */ |
| /*****************************************************************************/ |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_intra_populate_mode_bits_cost \endif |
| * |
| * \brief: look-up table of cost of signalling an intra mode in the |
| * bitstream |
| * |
| ***************************************************************************** |
| */ |
| static void ihevce_intra_populate_mode_bits_cost(UWORD16 *mode_bits_cost, WORD32 lambda) |
| { |
| WORD32 i; |
| // 5.5 * lambda |
| UWORD16 five_bits_cost = COMPUTE_RATE_COST_CLIP30(11, lambda, (LAMBDA_Q_SHIFT + 1)); |
| |
| for(i = 0; i < NUM_MODES; i++) |
| { |
| mode_bits_cost[i] = five_bits_cost; |
| } |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_8x8_sad_computer \endif |
| * |
| * \brief: compute sad between 2 8x8 blocks |
| * |
| ***************************************************************************** |
| */ |
| UWORD16 ihevce_8x8_sad_computer(UWORD8 *src, UWORD8 *pred, WORD32 src_strd, WORD32 pred_strd) |
| { |
| UWORD16 sad = 0; |
| WORD32 i, j; |
| |
| for(i = 0; i < 8; i++) |
| { |
| for(j = 0; j < 8; j++) |
| { |
| sad += ABS(src[j] - pred[j]); |
| } |
| src += src_strd; |
| pred += pred_strd; |
| } |
| |
| return sad; |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_4x4_sad_computer \endif |
| * |
| * \brief: compute sad between 2 4x4 blocks |
| * |
| ***************************************************************************** |
| */ |
| UWORD16 ihevce_4x4_sad_computer(UWORD8 *src, UWORD8 *pred, WORD32 src_strd, WORD32 pred_strd) |
| { |
| UWORD16 sad = 0; |
| WORD32 i, j; |
| |
| for(i = 0; i < 4; i++) |
| { |
| for(j = 0; j < 4; j++) |
| { |
| sad += ABS(src[j] - pred[j]); |
| } |
| src += src_strd; |
| pred += pred_strd; |
| } |
| |
| return sad; |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_ed_4x4_find_best_modes \endif |
| * |
| * \brief: evaluate input 4x4 block for pre-selected list intra modes and |
| * return best sad, cost |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_ed_4x4_find_best_modes( |
| UWORD8 *pu1_src, |
| WORD32 src_stride, |
| UWORD8 *ref, |
| UWORD16 *mode_bits_cost, |
| UWORD8 *pu1_best_modes, |
| WORD32 *pu1_best_sad_costs, |
| WORD32 u1_low_resol, |
| FT_SAD_COMPUTER *pf_4x4_sad_computer) |
| { |
| WORD32 i; |
| UWORD8 mode = 0, best_amode = 0, best_nmode = 0; |
| UWORD8 pred[16]; |
| WORD32 sad = 0; |
| WORD32 sad_cost = 0; |
| WORD32 best_asad_cost = 0xFFFFF; |
| WORD32 best_nsad_cost = 0xFFFFF; |
| |
| /* If lower layers, l1 or l2, all the 11 modes are evaluated */ |
| /* If L0 layer, all modes excluding DC and Planar are evaluated */ |
| if(1 == u1_low_resol) |
| i = 0; |
| else |
| i = 2; |
| |
| /* Find the best non-angular and angular mode till level 4 */ |
| for(; i < 11; i++) |
| { |
| mode = gau1_modes_to_eval[i]; |
| g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode); |
| sad = pf_4x4_sad_computer(pu1_src, pred, src_stride, 4); |
| sad_cost = sad + mode_bits_cost[mode]; |
| if(mode < 2) |
| { |
| if(sad_cost < best_nsad_cost) |
| { |
| best_nmode = mode; |
| best_nsad_cost = sad_cost; |
| } |
| } |
| else |
| { |
| if(sad_cost < best_asad_cost) |
| { |
| best_amode = mode; |
| best_asad_cost = sad_cost; |
| } |
| } |
| } |
| |
| pu1_best_modes[0] = best_amode; |
| pu1_best_sad_costs[0] = best_asad_cost; |
| |
| if(1 == u1_low_resol) |
| { |
| pu1_best_modes[1] = best_nmode; |
| pu1_best_sad_costs[1] = best_nsad_cost; |
| } |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_ed_calc_4x4_blk \endif |
| * |
| * \brief: evaluate input 4x4 block for all intra modes and return best sad & |
| * cost |
| * |
| ***************************************************************************** |
| */ |
| static void ihevce_ed_calc_4x4_blk( |
| ihevce_ed_blk_t *ps_ed, |
| UWORD8 *pu1_src, |
| WORD32 src_stride, |
| UWORD8 *ref, |
| UWORD16 *mode_bits_cost, |
| WORD32 *pi4_best_satd, |
| WORD32 i4_quality_preset, |
| WORD32 *pi4_best_sad_cost, |
| ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list) |
| { |
| WORD32 i, i_end; |
| UWORD8 mode, best_amode, best_nmode; |
| UWORD8 pred[16]; |
| UWORD16 sad; |
| WORD32 sad_cost = 0; |
| WORD32 best_asad_cost = 0xFFFFF; |
| WORD32 best_nsad_cost = 0xFFFFF; |
| UWORD8 au1_best_modes[2]; |
| WORD32 ai4_best_sad_costs[2]; |
| /* L1/L2 resolution hence low resolution enable */ |
| const WORD32 u1_low_resol = 1; |
| UWORD8 modes_to_eval[2]; |
| |
| ps_ipe_optimised_function_list->pf_ed_4x4_find_best_modes( |
| pu1_src, |
| src_stride, |
| ref, |
| mode_bits_cost, |
| au1_best_modes, |
| ai4_best_sad_costs, |
| u1_low_resol, |
| ps_ipe_optimised_function_list->pf_4x4_sad_computer); |
| |
| best_nmode = au1_best_modes[1]; |
| best_amode = au1_best_modes[0]; |
| best_nsad_cost = ai4_best_sad_costs[1]; |
| best_asad_cost = ai4_best_sad_costs[0]; |
| *pi4_best_satd = best_asad_cost - mode_bits_cost[best_amode]; |
| |
| /* Around best level 4 angular mode, search for best level 2 mode */ |
| modes_to_eval[0] = best_amode - 2; |
| modes_to_eval[1] = best_amode + 2; |
| i = 0; |
| i_end = 2; |
| if(best_amode == 2) |
| i = 1; |
| else if(best_amode == 34) |
| i_end = 1; |
| for(; i < i_end; i++) |
| { |
| mode = modes_to_eval[i]; |
| g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode); |
| sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(pu1_src, pred, src_stride, 4); |
| sad_cost = sad + mode_bits_cost[mode]; |
| if(sad_cost < best_asad_cost) |
| { |
| best_amode = mode; |
| best_asad_cost = sad_cost; |
| *pi4_best_satd = sad; |
| } |
| } |
| |
| if(i4_quality_preset < IHEVCE_QUALITY_P4) |
| { |
| /* Around best level 2 angular mode, search for best level 1 mode */ |
| modes_to_eval[0] = best_amode - 1; |
| modes_to_eval[1] = best_amode + 1; |
| i = 0; |
| i_end = 2; |
| if(best_amode == 2) |
| i = 1; |
| else if(best_amode == 34) |
| i_end = 1; |
| for(; i < i_end; i++) |
| { |
| mode = modes_to_eval[i]; |
| g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode); |
| sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(pu1_src, pred, src_stride, 4); |
| sad_cost = sad + mode_bits_cost[mode]; |
| if(sad_cost < best_asad_cost) |
| { |
| best_amode = mode; |
| best_asad_cost = sad_cost; |
| *pi4_best_satd = sad; |
| } |
| } |
| } |
| |
| if(best_asad_cost < best_nsad_cost) |
| { |
| ps_ed->best_mode = best_amode; |
| *pi4_best_sad_cost = best_asad_cost; |
| } |
| else |
| { |
| ps_ed->best_mode = best_nmode; |
| *pi4_best_sad_cost = best_nsad_cost; |
| } |
| ps_ed->intra_or_inter = 0; |
| ps_ed->merge_success = 0; |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_ed_calc_8x8_blk \endif |
| * |
| * \brief: evaluate input 8x8 block for intra modes basing on the intra mode |
| * decisions made at 4x4 level. This function also makes a decision whether |
| * to split blk in to 4x4 partitions or not. |
| * |
| ***************************************************************************** |
| */ |
| static void ihevce_ed_calc_8x8_blk( |
| ihevce_ed_ctxt_t *ps_ed_ctxt, |
| ihevce_ed_blk_t *ps_ed_8x8, |
| UWORD8 *pu1_src, |
| WORD32 src_stride, |
| WORD32 *nbr_flags_ptr, |
| WORD32 lambda, |
| WORD32 *pi4_best_satd, |
| WORD32 i4_layer_id, |
| WORD32 i4_quality_preset, |
| WORD32 *pi4_best_sad_cost_8x8_l1_ipe, |
| WORD32 *pi4_best_sad_8x8_l1_ipe, |
| ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list, |
| ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list) |
| { |
| ihevce_ed_blk_t *ps_ed_4x4 = ps_ed_8x8; |
| UWORD8 *pu1_src_arr[4]; |
| WORD32 ai4_4x4_best_sad_cost[4]; |
| WORD32 nbr_flags_c, nbr_flags_r; |
| UWORD8 *pu1_src_4x4; |
| WORD32 i, j; |
| func_selector_t *ps_func_selector = ps_ed_ctxt->ps_func_selector; |
| ihevc_intra_pred_luma_ref_substitution_ft *pf_intra_pred_luma_ref_substitution = |
| ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr; |
| |
| /* linearize ref samples for ipe of 8x8 block */ |
| nbr_flags_c = nbr_flags_ptr[0]; |
| nbr_flags_r = nbr_flags_ptr[1]; |
| if(CHECK_TR_AVAILABLE(nbr_flags_r)) |
| { |
| SET_TR_AVAILABLE(nbr_flags_c); |
| } |
| else |
| { |
| SET_TR_UNAVAILABLE(nbr_flags_c); |
| } |
| |
| pf_intra_pred_luma_ref_substitution( |
| pu1_src - src_stride - 1, |
| pu1_src - src_stride, |
| pu1_src - 1, |
| src_stride, |
| 8, |
| nbr_flags_c, |
| &ps_ed_ctxt->au1_ref_8x8[0][0], |
| 0); |
| |
| for(i = 0; i < 2; i++) |
| { |
| pu1_src_4x4 = pu1_src + i * 4 * src_stride; |
| for(j = 0; j < 2; j++) |
| { |
| WORD32 i4_best_satd; |
| |
| pu1_src_arr[i * 2 + j] = pu1_src_4x4; |
| nbr_flags_c = nbr_flags_ptr[i * 8 + j]; |
| |
| /* linearize ref samples for ipe of 4x4 block */ |
| pf_intra_pred_luma_ref_substitution( |
| pu1_src_4x4 - src_stride - 1, |
| pu1_src_4x4 - src_stride, |
| pu1_src_4x4 - 1, |
| src_stride, |
| 4, |
| nbr_flags_c, |
| &ps_ed_ctxt->au1_ref_full_ctb[i * 2 + j][0], |
| 0); |
| |
| /* populates mode bits cost */ |
| ihevce_intra_populate_mode_bits_cost( |
| &ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i * 2 + j][0], lambda); |
| |
| ihevce_ed_calc_4x4_blk( |
| ps_ed_4x4, |
| pu1_src_4x4, |
| src_stride, |
| &ps_ed_ctxt->au1_ref_full_ctb[i * 2 + j][0], |
| &ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i * 2 + j][0], |
| &i4_best_satd, |
| i4_quality_preset, |
| &ai4_4x4_best_sad_cost[i * 2 + j], |
| ps_ipe_optimised_function_list); |
| |
| pu1_src_4x4 += 4; |
| ps_ed_4x4 += 1; |
| } |
| } |
| |
| /* 8x8 merge */ |
| { |
| UWORD8 pred[64]; |
| WORD32 merge_success; |
| WORD32 sad, satd, cost; |
| UWORD16 u2_sum_best_4x4_sad_cost = 0; |
| UWORD16 u2_sum_best_4x4_satd_cost = 0; |
| WORD32 i4_best_8x8_sad, i4_best_8x8_satd = 0; |
| UWORD16 u2_best_8x8_cost = (UWORD16)(-1); |
| UWORD8 u1_best_8x8_mode; |
| UWORD8 modes_to_eval[6]; |
| UWORD8 u1_cond_4x4_satd; |
| UWORD8 mode; |
| |
| /* init */ |
| ps_ed_4x4 = ps_ed_8x8; |
| u1_best_8x8_mode = mode = ps_ed_4x4[0].best_mode; |
| merge_success = |
| (((ps_ed_4x4[0].best_mode == ps_ed_4x4[1].best_mode) + |
| (ps_ed_4x4[0].best_mode == ps_ed_4x4[2].best_mode) + |
| (ps_ed_4x4[0].best_mode == ps_ed_4x4[3].best_mode)) == 3); |
| *pi4_best_satd = 0; |
| |
| for(i = 0; i < 4; i++) |
| { |
| u2_sum_best_4x4_sad_cost += ai4_4x4_best_sad_cost[i]; |
| modes_to_eval[i] = ps_ed_4x4[i].best_mode; |
| } |
| |
| u1_cond_4x4_satd = ((1 == i4_layer_id) || (!merge_success && i4_quality_preset < IHEVCE_QUALITY_P4)); |
| if(u1_cond_4x4_satd) |
| { |
| /* Get SATD for 4x4 blocks */ |
| for(i = 0; i < 4; i++) |
| { |
| mode = modes_to_eval[i]; |
| g_apf_lum_ip[g_i4_ip_funcs[mode]]( |
| &ps_ed_ctxt->au1_ref_full_ctb[i][0], 0, &pred[0], 4, 4, mode); |
| |
| satd = ps_cmn_utils_optimised_function_list->pf_HAD_4x4_8bit( |
| pu1_src_arr[i], src_stride, &pred[0], 4, NULL, 0); |
| |
| (ps_ed_4x4 + i)->i4_4x4_satd = satd; |
| |
| u2_sum_best_4x4_satd_cost += |
| (satd + ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i][mode]); |
| *pi4_best_satd += satd; |
| } |
| } |
| |
| if(!merge_success) |
| { |
| UWORD8 i1_start; /* no of modes to evaluate */ |
| UWORD8 ai1_modes[6]; |
| WORD32 i4_merge_success_stage2 = 0; |
| |
| /* Prepare 6 candidates for 8x8 block. Two are DC and planar */ |
| ai1_modes[4] = 0; |
| ai1_modes[5] = 1; |
| i1_start = 4; |
| |
| /* Assign along with removing duplicates rest 4 candidates. */ |
| for(i = 3; i >= 0; i--) |
| { |
| WORD8 i1_fresh_mode_flag = 1; |
| |
| mode = modes_to_eval[i]; |
| /* Check if duplicate already exists in ai1_modes */ |
| for(j = i1_start; j < 6; j++) |
| { |
| if(mode == ai1_modes[j]) |
| i1_fresh_mode_flag = 0; |
| } |
| if(i1_fresh_mode_flag) |
| { |
| i1_start--; |
| ai1_modes[i1_start] = mode; |
| } |
| } |
| |
| if(i4_quality_preset < IHEVCE_QUALITY_P4) |
| { |
| // 7.5 * lambda to incorporate transform flags |
| u2_sum_best_4x4_satd_cost += |
| (COMPUTE_RATE_COST_CLIP30(12, lambda, (LAMBDA_Q_SHIFT + 1))); |
| |
| /* loop over all modes for calculating SATD */ |
| for(i = i1_start; i < 6; i++) |
| { |
| mode = ai1_modes[i]; |
| g_apf_lum_ip[g_i4_ip_funcs[mode]]( |
| &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred[0], 8, 8, mode); |
| |
| satd = ps_cmn_utils_optimised_function_list->pf_HAD_8x8_8bit( |
| pu1_src_arr[0], src_stride, &pred[0], 8, NULL, 0); |
| |
| cost = satd + ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][mode]; |
| |
| /* Update data corresponding to least 8x8 cost */ |
| if(cost <= u2_best_8x8_cost) |
| { |
| u2_best_8x8_cost = cost; |
| i4_best_8x8_satd = satd; |
| u1_best_8x8_mode = mode; |
| } |
| } |
| |
| /* 8x8 vs 4x4 decision based on SATD values */ |
| if((u2_best_8x8_cost <= u2_sum_best_4x4_satd_cost) || (u2_best_8x8_cost <= 300)) |
| { |
| i4_merge_success_stage2 = 1; |
| } |
| |
| /* Find the SAD based cost for 8x8 block for best mode */ |
| if(1 == i4_layer_id) |
| { |
| UWORD8 i4_best_8x8_mode = u1_best_8x8_mode; |
| WORD32 i4_best_8x8_sad_curr; |
| |
| g_apf_lum_ip[g_i4_ip_funcs[i4_best_8x8_mode]]( |
| &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred[0], 8, 8, i4_best_8x8_mode); |
| |
| i4_best_8x8_sad_curr = ps_ipe_optimised_function_list->pf_8x8_sad_computer( |
| pu1_src_arr[0], &pred[0], src_stride, 8); |
| |
| *pi4_best_sad_cost_8x8_l1_ipe = |
| i4_best_8x8_sad_curr + |
| ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][i4_best_8x8_mode]; |
| *pi4_best_sad_8x8_l1_ipe = i4_best_8x8_sad_curr; |
| } |
| } |
| else /*If high_speed or extreme speed*/ |
| { |
| // 7.5 * lambda to incorporate transform flags |
| u2_sum_best_4x4_sad_cost += |
| (COMPUTE_RATE_COST_CLIP30(12, lambda, (LAMBDA_Q_SHIFT + 1))); |
| |
| /*Loop over all modes for calculating SAD*/ |
| for(i = i1_start; i < 6; i++) |
| { |
| mode = ai1_modes[i]; |
| g_apf_lum_ip[g_i4_ip_funcs[mode]]( |
| &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred[0], 8, 8, mode); |
| |
| sad = ps_ipe_optimised_function_list->pf_8x8_sad_computer( |
| pu1_src_arr[0], &pred[0], src_stride, 8); |
| |
| cost = sad + ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][mode]; |
| |
| /*Find the data correspoinding to least cost */ |
| if(cost <= u2_best_8x8_cost) |
| { |
| u2_best_8x8_cost = cost; |
| i4_best_8x8_sad = sad; |
| u1_best_8x8_mode = mode; |
| } |
| } |
| |
| /* 8x8 vs 4x4 decision based on SAD values */ |
| if((u2_best_8x8_cost <= u2_sum_best_4x4_sad_cost) || (u2_best_8x8_cost <= 300)) |
| { |
| i4_merge_success_stage2 = 1; |
| if(1 == i4_layer_id) |
| { |
| g_apf_lum_ip[g_i4_ip_funcs[u1_best_8x8_mode]]( |
| &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred[0], 8, 8, u1_best_8x8_mode); |
| i4_best_8x8_satd = ps_cmn_utils_optimised_function_list->pf_HAD_8x8_8bit( |
| pu1_src_arr[0], src_stride, &pred[0], 8, NULL, 0); |
| } |
| } |
| |
| if(1 == i4_layer_id) |
| { |
| *pi4_best_sad_cost_8x8_l1_ipe = u2_best_8x8_cost; |
| *pi4_best_sad_8x8_l1_ipe = i4_best_8x8_sad; |
| } |
| } |
| if(i4_merge_success_stage2) |
| { |
| ps_ed_4x4->merge_success = 1; |
| ps_ed_4x4->best_merge_mode = u1_best_8x8_mode; |
| *pi4_best_satd = i4_best_8x8_satd; |
| } |
| } |
| else |
| { |
| ps_ed_4x4->merge_success = 1; |
| ps_ed_4x4->best_merge_mode = u1_best_8x8_mode; |
| |
| if(1 == i4_layer_id) |
| { |
| mode = u1_best_8x8_mode; |
| g_apf_lum_ip[g_i4_ip_funcs[mode]]( |
| &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred[0], 8, 8, mode); |
| |
| i4_best_8x8_sad = ps_ipe_optimised_function_list->pf_8x8_sad_computer( |
| pu1_src_arr[0], &pred[0], src_stride, 8); |
| |
| *pi4_best_sad_cost_8x8_l1_ipe = |
| i4_best_8x8_sad + ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][mode]; |
| *pi4_best_sad_8x8_l1_ipe = i4_best_8x8_sad; |
| |
| i4_best_8x8_satd = ps_cmn_utils_optimised_function_list->pf_HAD_8x8_8bit( |
| pu1_src_arr[0], src_stride, &pred[0], 8, NULL, 0); |
| } |
| *pi4_best_satd = i4_best_8x8_satd; |
| } |
| } |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_ed_calc_ctb \endif |
| * |
| * \brief: performs L1/L2 8x8 and 4x4 intra mode analysis |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_ed_calc_ctb( |
| ihevce_ed_ctxt_t *ps_ed_ctxt, |
| ihevce_ed_blk_t *ps_ed_ctb, |
| ihevce_ed_ctb_l1_t *ps_ed_ctb_l1, |
| UWORD8 *pu1_src, |
| WORD32 src_stride, |
| WORD32 num_4x4_blks_x, |
| WORD32 num_4x4_blks_y, |
| WORD32 *nbr_flags, |
| WORD32 i4_layer_id, |
| ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list, |
| ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list) |
| { |
| ihevce_ed_blk_t *ps_ed_8x8; |
| UWORD8 *pu1_src_8x8; |
| WORD32 *nbr_flags_ptr; |
| WORD32 lambda = ps_ed_ctxt->lambda; |
| WORD32 i, j; |
| WORD32 z_scan_idx = 0; |
| WORD32 z_scan_act_idx = 0; |
| |
| if(i4_layer_id == 1) |
| { |
| WORD32 i4_i; |
| |
| for(i4_i = 0; i4_i < 64; i4_i++) |
| { |
| (ps_ed_ctb + i4_i)->i4_4x4_satd = -1; |
| } |
| |
| for(i4_i = 0; i4_i < 16; i4_i++) |
| { |
| ps_ed_ctb_l1->i4_sum_4x4_satd[i4_i] = -2; |
| ps_ed_ctb_l1->i4_min_4x4_satd[i4_i] = 0x7FFFFFFF; |
| ps_ed_ctb_l1->i4_8x8_satd[i4_i][0] = -2; |
| ps_ed_ctb_l1->i4_8x8_satd[i4_i][1] = -2; |
| } |
| |
| for(i4_i = 0; i4_i < 4; i4_i++) |
| { |
| ps_ed_ctb_l1->i4_16x16_satd[i4_i][0] = -2; |
| ps_ed_ctb_l1->i4_16x16_satd[i4_i][1] = -2; |
| ps_ed_ctb_l1->i4_16x16_satd[i4_i][2] = -2; |
| } |
| ps_ed_ctb_l1->i4_32x32_satd[0][0] = -2; |
| ps_ed_ctb_l1->i4_32x32_satd[0][1] = -2; |
| ps_ed_ctb_l1->i4_32x32_satd[0][2] = -2; |
| ps_ed_ctb_l1->i4_32x32_satd[0][3] = -2; |
| |
| for(i4_i = 0; i4_i < 16; i4_i++) |
| { |
| ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_me[i4_i] = -1; |
| ps_ed_ctb_l1->i4_sad_cost_me_for_ref[i4_i] = -1; |
| ps_ed_ctb_l1->i4_sad_me_for_ref[i4_i] = -1; |
| ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[i4_i] = -1; |
| |
| ps_ed_ctb_l1->i4_best_sad_8x8_l1_me_for_decide[i4_i] = -1; |
| |
| ps_ed_ctb_l1->i4_best_satd_8x8[i4_i] = -1; |
| ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[i4_i] = -1; |
| ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[i4_i] = -1; |
| } |
| } |
| |
| ASSERT((num_4x4_blks_x & 1) == 0); |
| ASSERT((num_4x4_blks_y & 1) == 0); |
| for(i = 0; i < num_4x4_blks_y / 2; i++) |
| { |
| pu1_src_8x8 = pu1_src + i * 2 * 4 * src_stride; |
| nbr_flags_ptr = &nbr_flags[0] + 2 * 8 * i; |
| |
| for(j = 0; j < num_4x4_blks_x / 2; j++) |
| { |
| WORD32 i4_best_satd; |
| WORD32 i4_best_sad_cost_8x8_l1_ipe; |
| WORD32 i4_best_sad_8x8_l1_ipe; |
| |
| z_scan_idx = gau1_ctb_raster_to_zscan[i * 2 * 16 + j * 2]; |
| z_scan_act_idx = gau1_ctb_raster_to_zscan[i * 16 + j]; |
| ASSERT(z_scan_act_idx <= 15); |
| |
| ps_ed_8x8 = ps_ed_ctb + z_scan_idx; |
| ihevce_ed_calc_8x8_blk( |
| ps_ed_ctxt, |
| ps_ed_8x8, |
| pu1_src_8x8, |
| src_stride, |
| nbr_flags_ptr, |
| lambda, |
| &i4_best_satd, |
| i4_layer_id, |
| ps_ed_ctxt->i4_quality_preset, |
| &i4_best_sad_cost_8x8_l1_ipe, |
| &i4_best_sad_8x8_l1_ipe, |
| ps_ipe_optimised_function_list, |
| ps_cmn_utils_optimised_function_list); |
| ASSERT(i4_best_satd >= 0); |
| |
| if(i4_layer_id == 1) |
| { |
| ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[z_scan_act_idx] = |
| i4_best_sad_cost_8x8_l1_ipe; |
| ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[z_scan_act_idx] = i4_best_sad_8x8_l1_ipe; |
| ps_ed_ctb_l1->i4_best_satd_8x8[z_scan_act_idx] = i4_best_satd; |
| ps_ed_ctxt->i8_sum_best_satd += i4_best_satd; |
| ps_ed_ctxt->i8_sum_sq_best_satd += (i4_best_satd * i4_best_satd); |
| } |
| pu1_src_8x8 += 8; |
| nbr_flags_ptr += 2; |
| } |
| } |
| } |
| |
| float fast_log2(float val) |
| { |
| union { float val; int32_t x; } u = { val }; |
| float log_2 = (float)(((u.x >> 23) & 255) - 128); |
| |
| u.x &= ~(255 << 23); |
| u.x += 127 << 23; |
| log_2 += ((-1.0f / 3) * u.val + 2) * u.val - 2.0f / 3; |
| return log_2; |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_cu_level_qp_mod \endif |
| * |
| * \brief: Performs CU level QP modulation |
| * |
| ***************************************************************************** |
| */ |
| WORD32 ihevce_cu_level_qp_mod( |
| WORD32 frm_qscale, |
| WORD32 cu_satd, |
| long double frm_avg_activity, |
| float f_mod_strength, |
| WORD32 *pi4_act_factor, |
| WORD32 *pi4_q_scale_mod, |
| rc_quant_t *rc_quant_ctxt) |
| { |
| WORD32 cu_qscale; |
| WORD32 cu_qp; |
| |
| *pi4_act_factor = (1 << QP_LEVEL_MOD_ACT_FACTOR); |
| if(cu_satd != -1 && (WORD32)frm_avg_activity != 0) |
| { |
| ULWORD64 sq_cur_satd = (cu_satd * cu_satd); |
| float log2_sq_cur_satd = fast_log2(1 + sq_cur_satd); |
| WORD32 qp_offset = f_mod_strength * (log2_sq_cur_satd - frm_avg_activity); |
| |
| ASSERT(USE_SQRT_AVG_OF_SATD_SQR); |
| qp_offset = CLIP3(qp_offset, MIN_QP_MOD_OFFSET, MAX_QP_MOD_OFFSET); |
| *pi4_act_factor *= gad_look_up_activity[qp_offset + ABS(MIN_QP_MOD_OFFSET)]; |
| ASSERT(*pi4_act_factor > 0); |
| cu_qscale = ((frm_qscale * (*pi4_act_factor)) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))); |
| cu_qscale >>= QP_LEVEL_MOD_ACT_FACTOR; |
| } |
| else |
| { |
| cu_qscale = frm_qscale; |
| } |
| cu_qscale = CLIP3(cu_qscale, rc_quant_ctxt->i2_min_qscale, rc_quant_ctxt->i2_max_qscale); |
| cu_qp = rc_quant_ctxt->pi4_qscale_to_qp[cu_qscale]; |
| cu_qp = CLIP3(cu_qp, rc_quant_ctxt->i2_min_qp, rc_quant_ctxt->i2_max_qp); |
| *pi4_q_scale_mod = cu_qscale; |
| |
| return (cu_qp); |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_ed_frame_init \endif |
| * |
| * \brief: Initialize frame context for early decision |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_ed_frame_init(void *pv_ed_ctxt, WORD32 i4_layer_no) |
| { |
| ihevce_ed_ctxt_t *ps_ed_ctxt = (ihevce_ed_ctxt_t *)pv_ed_ctxt; |
| |
| g_apf_lum_ip[IP_FUNC_MODE_0] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_planar_fptr; |
| g_apf_lum_ip[IP_FUNC_MODE_1] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_dc_fptr; |
| g_apf_lum_ip[IP_FUNC_MODE_2] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode2_fptr; |
| g_apf_lum_ip[IP_FUNC_MODE_3TO9] = |
| ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr; |
| g_apf_lum_ip[IP_FUNC_MODE_10] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_horz_fptr; |
| g_apf_lum_ip[IP_FUNC_MODE_11TO17] = |
| ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr; |
| g_apf_lum_ip[IP_FUNC_MODE_18_34] = |
| ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr; |
| g_apf_lum_ip[IP_FUNC_MODE_19TO25] = |
| ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr; |
| g_apf_lum_ip[IP_FUNC_MODE_26] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_ver_fptr; |
| g_apf_lum_ip[IP_FUNC_MODE_27TO33] = |
| ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr; |
| |
| if(i4_layer_no == 1) |
| { |
| ps_ed_ctxt->i8_sum_best_satd = 0; |
| ps_ed_ctxt->i8_sum_sq_best_satd = 0; |
| } |
| } |
| |
| /** |
| ******************************************************************************** |
| * |
| * @brief downscales by 2 in horz and vertical direction, creates output of |
| * size wd/2 * ht/2 |
| * |
| * @param[in] pu1_src : source pointer |
| * @param[in] src_stride : source stride |
| * @param[out] pu1_dst : destination pointer. Starting of a row. |
| * @param[in] dst_stride : destination stride |
| * @param[in] wd : width |
| * @param[in] ht : height |
| * @param[in] pu1_wkg_mem : working memory (atleast of size CEIL16(wd) * ht)) |
| * @param[in] ht_offset : height offset of the block to be scaled |
| * @param[in] block_ht : height of the block to be scaled |
| * @param[in] wd_offset : width offset of the block to be scaled |
| * @param[in] block_wd : width of the block to be scaled |
| * |
| * @return void |
| * |
| * @remarks Assumption made block_ht should me multiple of 2. LANCZOS_SCALER |
| * |
| ******************************************************************************** |
| */ |
| void ihevce_scaling_filter_mxn( |
| UWORD8 *pu1_src, |
| WORD32 src_strd, |
| UWORD8 *pu1_scrtch, |
| WORD32 scrtch_strd, |
| UWORD8 *pu1_dst, |
| WORD32 dst_strd, |
| WORD32 ht, |
| WORD32 wd) |
| { |
| #define FILT_TAP_Q 8 |
| #define N_TAPS 7 |
| const WORD16 i4_ftaps[N_TAPS] = { -18, 0, 80, 132, 80, 0, -18 }; |
| WORD32 i, j; |
| WORD32 tmp; |
| UWORD8 *pu1_src_tmp = pu1_src - 3 * src_strd; |
| UWORD8 *pu1_scrtch_tmp = pu1_scrtch; |
| |
| /* horizontal filtering */ |
| for(i = -3; i < ht + 2; i++) |
| { |
| for(j = 0; j < wd; j += 2) |
| { |
| tmp = (i4_ftaps[3] * pu1_src_tmp[j] + |
| i4_ftaps[2] * (pu1_src_tmp[j - 1] + pu1_src_tmp[j + 1]) + |
| i4_ftaps[1] * (pu1_src_tmp[j + 2] + pu1_src_tmp[j - 2]) + |
| i4_ftaps[0] * (pu1_src_tmp[j + 3] + pu1_src_tmp[j - 3]) + |
| (1 << (FILT_TAP_Q - 1))) >> |
| FILT_TAP_Q; |
| pu1_scrtch_tmp[j >> 1] = CLIP_U8(tmp); |
| } |
| pu1_scrtch_tmp += scrtch_strd; |
| pu1_src_tmp += src_strd; |
| } |
| /* vertical filtering */ |
| pu1_scrtch_tmp = pu1_scrtch + 3 * scrtch_strd; |
| for(i = 0; i < ht; i += 2) |
| { |
| for(j = 0; j < (wd >> 1); j++) |
| { |
| tmp = |
| (i4_ftaps[3] * pu1_scrtch_tmp[j] + |
| i4_ftaps[2] * (pu1_scrtch_tmp[j + scrtch_strd] + pu1_scrtch_tmp[j - scrtch_strd]) + |
| i4_ftaps[1] * |
| (pu1_scrtch_tmp[j + 2 * scrtch_strd] + pu1_scrtch_tmp[j - 2 * scrtch_strd]) + |
| i4_ftaps[0] * |
| (pu1_scrtch_tmp[j + 3 * scrtch_strd] + pu1_scrtch_tmp[j - 3 * scrtch_strd]) + |
| (1 << (FILT_TAP_Q - 1))) >> |
| FILT_TAP_Q; |
| pu1_dst[j] = CLIP_U8(tmp); |
| } |
| pu1_dst += dst_strd; |
| pu1_scrtch_tmp += (scrtch_strd << 1); |
| } |
| } |
| |
| void ihevce_scale_by_2( |
| UWORD8 *pu1_src, |
| WORD32 src_strd, |
| UWORD8 *pu1_dst, |
| WORD32 dst_strd, |
| WORD32 wd, |
| WORD32 ht, |
| UWORD8 *pu1_wkg_mem, |
| WORD32 ht_offset, |
| WORD32 block_ht, |
| WORD32 wd_offset, |
| WORD32 block_wd, |
| FT_COPY_2D *pf_copy_2d, |
| FT_SCALING_FILTER_BY_2 *pf_scaling_filter_mxn) |
| { |
| #define N_TAPS 7 |
| #define MAX_BLK_SZ (MAX_CTB_SIZE + ((N_TAPS >> 1) << 1)) |
| UWORD8 au1_cpy[MAX_BLK_SZ * MAX_BLK_SZ]; |
| UWORD32 cpy_strd = MAX_BLK_SZ; |
| UWORD8 *pu1_cpy = au1_cpy + cpy_strd * (N_TAPS >> 1) + (N_TAPS >> 1); |
| |
| UWORD8 *pu1_in, *pu1_out; |
| WORD32 in_strd, wkg_mem_strd; |
| |
| WORD32 row_start, row_end; |
| WORD32 col_start, col_end; |
| WORD32 i, fun_select; |
| WORD32 ht_tmp, wd_tmp; |
| FT_SCALING_FILTER_BY_2 *ihevce_scaling_filters[2]; |
| |
| assert((wd & 1) == 0); |
| assert((ht & 1) == 0); |
| assert(block_wd <= MAX_CTB_SIZE); |
| assert(block_ht <= MAX_CTB_SIZE); |
| |
| /* function pointers for filtering different dimensions */ |
| ihevce_scaling_filters[0] = ihevce_scaling_filter_mxn; |
| ihevce_scaling_filters[1] = pf_scaling_filter_mxn; |
| |
| /* handle boundary blks */ |
| col_start = (wd_offset < (N_TAPS >> 1)) ? 1 : 0; |
| row_start = (ht_offset < (N_TAPS >> 1)) ? 1 : 0; |
| col_end = ((wd_offset + block_wd) > (wd - (N_TAPS >> 1))) ? 1 : 0; |
| row_end = ((ht_offset + block_ht) > (ht - (N_TAPS >> 1))) ? 1 : 0; |
| if(col_end && (wd % block_wd != 0)) |
| { |
| block_wd = (wd % block_wd); |
| } |
| if(row_end && (ht % block_ht != 0)) |
| { |
| block_ht = (ht % block_ht); |
| } |
| |
| /* boundary blks needs to be padded, copy src to tmp buffer */ |
| if(col_start || col_end || row_end || row_start) |
| { |
| UWORD8 *pu1_src_tmp = pu1_src + wd_offset + ht_offset * src_strd; |
| |
| pu1_cpy -= (3 * (1 - col_start) + cpy_strd * 3 * (1 - row_start)); |
| pu1_src_tmp -= (3 * (1 - col_start) + src_strd * 3 * (1 - row_start)); |
| ht_tmp = block_ht + 3 * (1 - row_start) + 3 * (1 - row_end); |
| wd_tmp = block_wd + 3 * (1 - col_start) + 3 * (1 - col_end); |
| pf_copy_2d(pu1_cpy, cpy_strd, pu1_src_tmp, src_strd, wd_tmp, ht_tmp); |
| pu1_in = au1_cpy + cpy_strd * 3 + 3; |
| in_strd = cpy_strd; |
| } |
| else |
| { |
| pu1_in = pu1_src + wd_offset + ht_offset * src_strd; |
| in_strd = src_strd; |
| } |
| |
| /*top padding*/ |
| if(row_start) |
| { |
| UWORD8 *pu1_cpy_tmp = au1_cpy + cpy_strd * 3; |
| |
| pu1_cpy = au1_cpy + cpy_strd * (3 - 1); |
| memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6); |
| pu1_cpy -= cpy_strd; |
| memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6); |
| pu1_cpy -= cpy_strd; |
| memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6); |
| } |
| |
| /*bottom padding*/ |
| if(row_end) |
| { |
| UWORD8 *pu1_cpy_tmp = au1_cpy + cpy_strd * 3 + (block_ht - 1) * cpy_strd; |
| |
| pu1_cpy = pu1_cpy_tmp + cpy_strd; |
| memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6); |
| pu1_cpy += cpy_strd; |
| memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6); |
| pu1_cpy += cpy_strd; |
| memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6); |
| } |
| |
| /*left padding*/ |
| if(col_start) |
| { |
| UWORD8 *pu1_cpy_tmp = au1_cpy + 3; |
| |
| pu1_cpy = au1_cpy; |
| for(i = 0; i < block_ht + 6; i++) |
| { |
| pu1_cpy[0] = pu1_cpy[1] = pu1_cpy[2] = pu1_cpy_tmp[0]; |
| pu1_cpy += cpy_strd; |
| pu1_cpy_tmp += cpy_strd; |
| } |
| } |
| |
| /*right padding*/ |
| if(col_end) |
| { |
| UWORD8 *pu1_cpy_tmp = au1_cpy + 3 + block_wd - 1; |
| |
| pu1_cpy = au1_cpy + 3 + block_wd; |
| for(i = 0; i < block_ht + 6; i++) |
| { |
| pu1_cpy[0] = pu1_cpy[1] = pu1_cpy[2] = pu1_cpy_tmp[0]; |
| pu1_cpy += cpy_strd; |
| pu1_cpy_tmp += cpy_strd; |
| } |
| } |
| |
| wkg_mem_strd = block_wd >> 1; |
| pu1_out = pu1_dst + (wd_offset >> 1); |
| fun_select = (block_wd % 16 == 0); |
| ihevce_scaling_filters[fun_select]( |
| pu1_in, in_strd, pu1_wkg_mem, wkg_mem_strd, pu1_out, dst_strd, block_ht, block_wd); |
| |
| /* Left padding of 16 for 1st block of every row */ |
| if(wd_offset == 0) |
| { |
| UWORD8 u1_val; |
| WORD32 pad_wd = 16; |
| WORD32 pad_ht = block_ht >> 1; |
| UWORD8 *dst = pu1_dst; |
| |
| for(i = 0; i < pad_ht; i++) |
| { |
| u1_val = dst[0]; |
| memset(&dst[-pad_wd], u1_val, pad_wd); |
| dst += dst_strd; |
| } |
| } |
| |
| if(wd == wd_offset + block_wd) |
| { |
| /* Right padding of (16 + (CEIL16(wd/2))-wd/2) for last block of every row */ |
| /* Right padding is done only after processing of last block of that row is done*/ |
| UWORD8 u1_val; |
| WORD32 pad_wd = 16 + CEIL16((wd >> 1)) - (wd >> 1) + 4; |
| WORD32 pad_ht = block_ht >> 1; |
| UWORD8 *dst = pu1_dst + (wd >> 1) - 1; |
| |
| for(i = 0; i < pad_ht; i++) |
| { |
| u1_val = dst[0]; |
| memset(&dst[1], u1_val, pad_wd); |
| dst += dst_strd; |
| } |
| |
| if(ht_offset == 0) |
| { |
| /* Top padding of 16 is done for 1st row only after we reach end of that row */ |
| pad_wd = dst_strd; |
| pad_ht = 16; |
| dst = pu1_dst - 16; |
| for(i = 1; i <= pad_ht; i++) |
| { |
| memcpy(dst - (i * dst_strd), dst, pad_wd); |
| } |
| } |
| |
| /* Bottom padding of (16 + (CEIL16(ht/2)) - ht/2) is done only if we have |
| reached end of frame */ |
| if(ht - ht_offset - block_ht == 0) |
| { |
| pad_wd = dst_strd; |
| pad_ht = 16 + CEIL16((ht >> 1)) - (ht >> 1) + 4; |
| dst = pu1_dst + (((block_ht >> 1) - 1) * dst_strd) - 16; |
| for(i = 1; i <= pad_ht; i++) |
| memcpy(dst + (i * dst_strd), dst, pad_wd); |
| } |
| } |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_decomp_pre_intra_process_row \endif |
| * |
| * \brief |
| * Row level function which down scales a given row by 2 in horz and vertical |
| * direction creates output of size wd/2 * ht/2. When decomposition is done |
| * from L1 to L2 pre intra analysis is done on L1 |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_decomp_pre_intra_process_row( |
| UWORD8 *pu1_src, |
| WORD32 src_stride, |
| UWORD8 *pu1_dst_decomp, |
| WORD32 dst_stride, |
| WORD32 layer_wd, |
| WORD32 layer_ht, |
| UWORD8 *pu1_wkg_mem, |
| WORD32 ht_offset, |
| WORD32 block_ht, |
| WORD32 block_wd, |
| WORD32 num_col_blks, |
| WORD32 layer_no, |
| ihevce_ed_ctxt_t *ps_ed_ctxt, |
| ihevce_ed_blk_t *ps_ed_row, |
| ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row, |
| WORD32 num_4x4_blks_ctb_y, |
| WORD32 num_4x4_blks_last_ctb_x, |
| WORD32 skip_decomp, |
| WORD32 skip_pre_intra, |
| WORD32 row_block_no, |
| ctb_analyse_t *ps_ctb_analyse, |
| ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list, |
| ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list) |
| { |
| WORD32 do_pre_intra_analysis = ((layer_no == 1) || (layer_no == 2)) && (!skip_pre_intra); |
| WORD32 col_block_no; |
| WORD32 i, j; |
| |
| if(!skip_decomp) |
| { |
| ctb_analyse_t *ps_ctb_analyse_curr = ps_ctb_analyse + row_block_no * num_col_blks; |
| |
| for(col_block_no = 0; col_block_no < num_col_blks; col_block_no++) |
| { |
| ihevce_scale_by_2( |
| pu1_src, |
| src_stride, |
| pu1_dst_decomp, |
| dst_stride, |
| layer_wd, |
| layer_ht, |
| pu1_wkg_mem, |
| ht_offset, |
| block_ht, |
| block_wd * col_block_no, |
| block_wd, |
| ps_cmn_utils_optimised_function_list->pf_copy_2d, |
| ps_ipe_optimised_function_list->pf_scaling_filter_mxn); |
| |
| /* Disable noise detection */ |
| memset( |
| ps_ctb_analyse_curr->s_ctb_noise_params.au1_is_8x8Blk_noisy, |
| 0, |
| sizeof(ps_ctb_analyse_curr->s_ctb_noise_params.au1_is_8x8Blk_noisy)); |
| |
| ps_ctb_analyse_curr->s_ctb_noise_params.i4_noise_present = 0; |
| |
| ps_ctb_analyse_curr++; |
| } |
| } |
| |
| if(do_pre_intra_analysis) |
| { |
| ihevce_ed_blk_t *ps_ed_ctb = ps_ed_row; |
| ihevce_ed_ctb_l1_t *ps_ed_ctb_l1 = ps_ed_ctb_l1_row; |
| WORD32 *nbr_flags_ptr = &ps_ed_ctxt->ai4_nbr_flags[0]; |
| UWORD8 *pu1_src_pre_intra = pu1_src + (ht_offset * src_stride); |
| WORD32 num_4x4_blks_in_ctb = block_wd >> 2; |
| WORD32 src_inc_pre_intra = num_4x4_blks_in_ctb * 4; |
| WORD32 inc_ctb = num_4x4_blks_in_ctb * num_4x4_blks_in_ctb; |
| |
| /* To analyse any given CTB we need to set the availability flags of the |
| * following neighbouring CTB: BL,L,TL,T,TR */ |
| /* copy the neighbor flags for a general ctb (ctb inside the frame); not any corners */ |
| memcpy( |
| ps_ed_ctxt->ai4_nbr_flags, |
| gau4_nbr_flags_8x8_4x4blks, |
| sizeof(gau4_nbr_flags_8x8_4x4blks)); |
| |
| /* set top flags unavailable for first ctb row */ |
| if(ht_offset == 0) |
| { |
| for(j = 0; j < num_4x4_blks_in_ctb; j++) |
| { |
| SET_T_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j]); |
| SET_TR_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j]); |
| SET_TL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j]); |
| } |
| } |
| |
| /* set bottom left flags as not available for last row */ |
| if(ht_offset + block_ht >= layer_ht) |
| { |
| for(j = 0; j < num_4x4_blks_in_ctb; j++) |
| { |
| SET_BL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[(num_4x4_blks_ctb_y - 1) * 8 + j]); |
| } |
| } |
| |
| /* set left flags unavailable for 1st ctb col */ |
| for(j = 0; j < num_4x4_blks_ctb_y; j++) |
| { |
| SET_L_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]); |
| SET_BL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]); |
| SET_TL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]); |
| } |
| |
| for(col_block_no = 0; col_block_no < num_col_blks; col_block_no++) |
| { |
| if(col_block_no == 1) |
| { |
| /* For the rest of the ctbs, set left flags available */ |
| for(j = 0; j < num_4x4_blks_ctb_y; j++) |
| { |
| SET_L_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]); |
| } |
| for(j = 0; j < num_4x4_blks_ctb_y - 1; j++) |
| { |
| SET_BL_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]); |
| SET_TL_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[(j + 1) * 8]); |
| } |
| if(ht_offset != 0) |
| { |
| SET_TL_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[0]); |
| } |
| } |
| |
| if(col_block_no == num_col_blks - 1) |
| { |
| /* set top right flags unavailable for last ctb col */ |
| for(i = 0; i < num_4x4_blks_ctb_y; i++) |
| { |
| SET_TR_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[i * 8 + num_4x4_blks_last_ctb_x - 1]); |
| } |
| } |
| |
| /* Call intra analysis for the ctb */ |
| ihevce_ed_calc_ctb( |
| ps_ed_ctxt, |
| ps_ed_ctb, |
| ps_ed_ctb_l1, |
| pu1_src_pre_intra, |
| src_stride, |
| (col_block_no == num_col_blks - 1) ? num_4x4_blks_last_ctb_x : num_4x4_blks_in_ctb, |
| num_4x4_blks_ctb_y, |
| nbr_flags_ptr, |
| layer_no, |
| ps_ipe_optimised_function_list, |
| ps_cmn_utils_optimised_function_list); |
| pu1_src_pre_intra += src_inc_pre_intra; |
| ps_ed_ctb += inc_ctb; |
| ps_ed_ctb_l1 += 1; |
| } |
| } |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_decomp_pre_intra_process \endif |
| * |
| * \brief |
| * Frame level function to decompose given layer L0 into coarser layers and |
| * perform intra analysis on layers below L0 |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_decomp_pre_intra_process( |
| void *pv_ctxt, |
| ihevce_lap_output_params_t *ps_lap_out_prms, |
| frm_ctb_ctxt_t *ps_frm_ctb_prms, |
| void *pv_multi_thrd_ctxt, |
| WORD32 thrd_id, |
| WORD32 i4_ping_pong) |
| { |
| ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt = pv_ctxt; |
| ihevce_decomp_pre_intra_ctxt_t *ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[thrd_id]; |
| multi_thrd_ctxt_t *ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt; |
| WORD32 i4_num_layers = ps_ctxt->i4_num_layers; |
| UWORD8 *pu1_wkg_mem = ps_ctxt->au1_wkg_mem; |
| ihevce_ed_ctxt_t *ps_ed_ctxt = ps_ctxt->ps_ed_ctxt; |
| ihevce_ed_ctb_l1_t *ps_ed_ctb_l1 = ps_ed_ctxt->ps_ed_ctb_l1; |
| ihevce_ed_blk_t *ps_ed; |
| WORD32 i4_layer_no; |
| WORD32 end_of_layer; |
| UWORD8 *pu1_src, *pu1_dst; |
| WORD32 src_stride, dst_stride; |
| WORD32 i4_layer_wd, i4_layer_ht; |
| WORD32 ht_offset, block_ht, row_block_no, num_row_blocks; |
| WORD32 block_wd, num_col_blks; |
| WORD32 skip_decomp, skip_pre_intra; |
| WORD32 inc_ctb; |
| |
| ASSERT(i4_num_layers >= 3); |
| ps_ctxt->as_layers[0].pu1_inp = (UWORD8 *)ps_lap_out_prms->s_input_buf.pv_y_buf; |
| ps_ctxt->as_layers[0].i4_inp_stride = ps_lap_out_prms->s_input_buf.i4_y_strd; |
| ps_ctxt->as_layers[0].i4_actual_wd = ps_lap_out_prms->s_input_buf.i4_y_wd; |
| ps_ctxt->as_layers[0].i4_actual_ht = ps_lap_out_prms->s_input_buf.i4_y_ht; |
| |
| /* This loop does decomp & intra by picking jobs from job queue */ |
| for(i4_layer_no = 0; i4_layer_no < i4_num_layers; i4_layer_no++) |
| { |
| WORD32 idx = 0; |
| |
| src_stride = ps_ctxt->as_layers[i4_layer_no].i4_inp_stride; |
| pu1_src = ps_ctxt->as_layers[i4_layer_no].pu1_inp; |
| i4_layer_wd = ps_ctxt->as_layers[i4_layer_no].i4_actual_wd; |
| i4_layer_ht = ps_ctxt->as_layers[i4_layer_no].i4_actual_ht; |
| pu1_dst = ps_ctxt->as_layers[i4_layer_no + 1].pu1_inp; |
| dst_stride = ps_ctxt->as_layers[i4_layer_no + 1].i4_inp_stride; |
| block_wd = ps_ctxt->as_layers[i4_layer_no].i4_decomp_blk_wd; |
| block_ht = ps_ctxt->as_layers[i4_layer_no].i4_decomp_blk_ht; |
| num_col_blks = ps_ctxt->as_layers[i4_layer_no].i4_num_col_blks; |
| num_row_blocks = ps_ctxt->as_layers[i4_layer_no].i4_num_row_blks; |
| inc_ctb = (block_wd >> 2) * (block_wd >> 2); |
| end_of_layer = 0; |
| skip_pre_intra = 1; |
| skip_decomp = 0; |
| if(i4_layer_no >= (ps_ctxt->i4_num_layers - 1)) |
| { |
| skip_decomp = 1; |
| } |
| |
| /* ------------ Loop over all the CTB rows & perform Decomp --------------- */ |
| while(0 == end_of_layer) |
| { |
| job_queue_t *ps_pre_enc_job; |
| WORD32 num_4x4_blks_ctb_y = 0, num_4x4_blks_last_ctb_x = 0; |
| |
| /* Get the current row from the job queue */ |
| ps_pre_enc_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job( |
| pv_multi_thrd_ctxt, (DECOMP_JOB_LYR0 + i4_layer_no), 1, i4_ping_pong); |
| |
| /* If all rows are done, set the end of layer flag to 1, */ |
| if(NULL == ps_pre_enc_job) |
| { |
| end_of_layer = 1; |
| } |
| else |
| { |
| /* Obtain the current row's details from the job */ |
| row_block_no = ps_pre_enc_job->s_job_info.s_decomp_job_info.i4_vert_unit_row_no; |
| ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = row_block_no; |
| ht_offset = row_block_no * block_ht; |
| |
| if(row_block_no < (num_row_blocks)) |
| { |
| pu1_dst = ps_ctxt->as_layers[i4_layer_no + 1].pu1_inp + |
| ((block_ht >> 1) * dst_stride * row_block_no); |
| |
| /* call the row level processing function */ |
| ihevce_decomp_pre_intra_process_row( |
| pu1_src, |
| src_stride, |
| pu1_dst, |
| dst_stride, |
| i4_layer_wd, |
| i4_layer_ht, |
| pu1_wkg_mem, |
| ht_offset, |
| block_ht, |
| block_wd, |
| num_col_blks, |
| i4_layer_no, |
| ps_ed_ctxt, |
| ps_ed, |
| ps_ed_ctb_l1, |
| num_4x4_blks_ctb_y, |
| num_4x4_blks_last_ctb_x, |
| skip_decomp, |
| skip_pre_intra, |
| row_block_no, |
| ps_ctxt->ps_ctb_analyse, |
| &ps_ctxt->s_ipe_optimised_function_list, |
| &ps_ctxt->s_cmn_opt_func); |
| } |
| idx++; |
| /* set the output dependency */ |
| ihevce_pre_enc_grp_job_set_out_dep( |
| pv_multi_thrd_ctxt, ps_pre_enc_job, i4_ping_pong); |
| } |
| } |
| ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = idx; |
| |
| /* ------------ For the same rows perform preintra if required --------------- */ |
| ihevce_ed_frame_init(ps_ed_ctxt, i4_layer_no); |
| |
| if((1 == i4_layer_no) && (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset)) |
| { |
| WORD32 vert_ctr, ctb_ctr, i; |
| WORD32 ctb_ctr_blks = ps_ctxt->as_layers[1].i4_num_col_blks; |
| WORD32 vert_ctr_blks = ps_ctxt->as_layers[1].i4_num_row_blks; |
| |
| if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) && |
| (ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE)) |
| { |
| for(vert_ctr = 0; vert_ctr < vert_ctr_blks; vert_ctr++) |
| { |
| ihevce_ed_ctb_l1_t *ps_ed_ctb_row_l1 = |
| ps_ctxt->ps_ed_ctb_l1 + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz; |
| |
| for(ctb_ctr = 0; ctb_ctr < ctb_ctr_blks; ctb_ctr++) |
| { |
| ihevce_ed_ctb_l1_t *ps_ed_ctb_curr_l1 = ps_ed_ctb_row_l1 + ctb_ctr; |
| |
| for(i = 0; i < 16; i++) |
| { |
| ps_ed_ctb_curr_l1->i4_best_sad_cost_8x8_l1_ipe[i] = 0x7fffffff; |
| ps_ed_ctb_curr_l1->i4_best_sad_8x8_l1_ipe[i] = 0x7fffffff; |
| } |
| } |
| } |
| } |
| } |
| |
| #if DISABLE_L2_IPE_IN_PB_L1_IN_B |
| if(((2 == i4_layer_no) && (ps_lap_out_prms->i4_pic_type == IV_I_FRAME || |
| ps_lap_out_prms->i4_pic_type == IV_IDR_FRAME)) || |
| ((1 == i4_layer_no) && |
| (ps_lap_out_prms->i4_temporal_lyr_id <= TEMPORAL_LAYER_DISABLE)) || |
| ((IHEVCE_QUALITY_P6 != ps_ctxt->i4_quality_preset) && (0 != i4_layer_no))) |
| #else |
| if((0 != i4_layer_no) && |
| (1 != ((IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) && |
| (ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE)))) |
| #endif |
| { |
| WORD32 i4_num_rows = ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed; |
| |
| ps_ed_ctxt->lambda = ps_ctxt->ai4_lambda[i4_layer_no]; |
| if(0 == i4_layer_no) |
| { |
| ps_ed_ctxt->ps_ed_pic = NULL; |
| ps_ed_ctxt->ps_ed = NULL; |
| ps_ed_ctxt->ps_ed_ctb_l1_pic = NULL; |
| ps_ed_ctxt->ps_ed_ctb_l1 = NULL; |
| } |
| else if(1 == i4_layer_no) |
| { |
| ps_ed_ctxt->ps_ed_pic = ps_ctxt->ps_layer1_buf; |
| ps_ed_ctxt->ps_ed = ps_ctxt->ps_layer1_buf; |
| ps_ed_ctxt->ps_ed_ctb_l1_pic = ps_ctxt->ps_ed_ctb_l1; |
| ps_ed_ctxt->ps_ed_ctb_l1 = ps_ctxt->ps_ed_ctb_l1; |
| } |
| else if(2 == i4_layer_no) |
| { |
| ps_ed_ctxt->ps_ed_pic = ps_ctxt->ps_layer2_buf; |
| ps_ed_ctxt->ps_ed = ps_ctxt->ps_layer2_buf; |
| ps_ed_ctxt->ps_ed_ctb_l1_pic = NULL; |
| ps_ed_ctxt->ps_ed_ctb_l1 = NULL; |
| } |
| |
| skip_decomp = 1; |
| skip_pre_intra = 0; |
| |
| for(idx = 0; idx < i4_num_rows; idx++) |
| { |
| WORD32 num_4x4_blks_ctb_y = 0, num_4x4_blks_last_ctb_x = 0; |
| |
| /* Obtain the current row's details from the job */ |
| row_block_no = ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx]; |
| ht_offset = row_block_no * block_ht; |
| |
| if(row_block_no < (num_row_blocks)) |
| { |
| pu1_dst = ps_ctxt->as_layers[i4_layer_no + 1].pu1_inp + |
| ((block_ht >> 1) * dst_stride * row_block_no); |
| |
| if(i4_layer_no == 1 || i4_layer_no == 2) |
| { |
| ps_ed = ps_ed_ctxt->ps_ed + (row_block_no * inc_ctb * (num_col_blks)); |
| ps_ed_ctb_l1 = ps_ed_ctxt->ps_ed_ctb_l1 + (row_block_no * num_col_blks); |
| ps_ed_ctxt->i4_quality_preset = ps_ctxt->i4_quality_preset; |
| num_4x4_blks_last_ctb_x = block_wd >> 2; |
| num_4x4_blks_ctb_y = block_ht >> 2; |
| if(row_block_no == num_row_blocks - 1) |
| { |
| if(i4_layer_ht % block_ht) |
| { |
| num_4x4_blks_ctb_y = ((i4_layer_ht % block_ht) + 3) >> 2; |
| } |
| } |
| if(i4_layer_wd % block_wd) |
| { |
| num_4x4_blks_last_ctb_x = ((i4_layer_wd % block_wd) + 3) >> 2; |
| } |
| } |
| |
| /* call the row level processing function */ |
| ihevce_decomp_pre_intra_process_row( |
| pu1_src, |
| src_stride, |
| pu1_dst, |
| dst_stride, |
| i4_layer_wd, |
| i4_layer_ht, |
| pu1_wkg_mem, |
| ht_offset, |
| block_ht, |
| block_wd, |
| num_col_blks, |
| i4_layer_no, |
| ps_ed_ctxt, |
| ps_ed, |
| ps_ed_ctb_l1, |
| num_4x4_blks_ctb_y, |
| num_4x4_blks_last_ctb_x, |
| skip_decomp, |
| skip_pre_intra, |
| row_block_no, |
| NULL, |
| &ps_ctxt->s_ipe_optimised_function_list, |
| &ps_ctxt->s_cmn_opt_func); |
| } |
| |
| if(1 == i4_layer_no) |
| { |
| ps_multi_thrd->aai4_l1_pre_intra_done[i4_ping_pong][row_block_no] = 1; |
| } |
| } |
| for(idx = 0; idx < MAX_NUM_CTB_ROWS_FRM; idx++) |
| { |
| ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = -1; |
| } |
| ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = 0; |
| } |
| |
| #if DISABLE_L2_IPE_IN_PB_L1_IN_B |
| if((IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) && |
| (((i4_layer_no == 2) && (ps_lap_out_prms->i4_pic_type == ISLICE)) || |
| ((i4_layer_no == 1) && (ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE)))) |
| { |
| WORD32 i4_num_rows = ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed; |
| if(1 == i4_layer_no) |
| { |
| for(idx = 0; idx < i4_num_rows; idx++) |
| { |
| row_block_no = ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx]; |
| |
| { |
| ps_multi_thrd->aai4_l1_pre_intra_done[i4_ping_pong][row_block_no] = 1; |
| } |
| } |
| } |
| for(idx = 0; idx < MAX_NUM_CTB_ROWS_FRM; idx++) |
| { |
| ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = -1; |
| } |
| ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = 0; |
| } |
| #else |
| if((i4_layer_no != 0) && ((IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) && |
| (ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE))) |
| { |
| WORD32 i4_num_rows = ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed; |
| for(idx = 0; idx < i4_num_rows; idx++) |
| { |
| row_block_no = ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx]; |
| if(1 == i4_layer_no) |
| { |
| ps_multi_thrd->aai4_l1_pre_intra_done[i4_ping_pong][row_block_no] = 1; |
| } |
| } |
| for(idx = 0; idx < MAX_NUM_CTB_ROWS_FRM; idx++) |
| { |
| ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = -1; |
| } |
| ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = 0; |
| } |
| #endif |
| } |
| } |
| |
| /*! |
| ************************************************************************ |
| * \brief |
| * return number of records used by decomp pre intra |
| * |
| ************************************************************************ |
| */ |
| WORD32 ihevce_decomp_pre_intra_get_num_mem_recs(void) |
| { |
| return (NUM_DECOMP_PRE_INTRA_MEM_RECS); |
| } |
| |
| /*! |
| ************************************************************************ |
| * @brief |
| * return each record attributes of decomp pre intra |
| ************************************************************************ |
| */ |
| WORD32 ihevce_decomp_pre_intra_get_mem_recs( |
| iv_mem_rec_t *ps_mem_tab, WORD32 i4_num_proc_thrds, WORD32 i4_mem_space) |
| { |
| /* memories should be requested assuming worst case requirememnts */ |
| |
| /* Module context structure */ |
| ps_mem_tab[DECOMP_PRE_INTRA_CTXT].i4_mem_size = sizeof(ihevce_decomp_pre_intra_master_ctxt_t); |
| ps_mem_tab[DECOMP_PRE_INTRA_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
| ps_mem_tab[DECOMP_PRE_INTRA_CTXT].i4_mem_alignment = 8; |
| |
| /* Thread context structure */ |
| ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].i4_mem_size = |
| i4_num_proc_thrds * sizeof(ihevce_decomp_pre_intra_ctxt_t); |
| ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
| ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].i4_mem_alignment = 8; |
| |
| /* early decision context structure */ |
| ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].i4_mem_size = i4_num_proc_thrds * sizeof(ihevce_ed_ctxt_t); |
| ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
| ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].i4_mem_alignment = 8; |
| |
| return (NUM_DECOMP_PRE_INTRA_MEM_RECS); |
| } |
| |
| /*! |
| ************************************************************************ |
| * @brief |
| * Init decomp pre intra context |
| ************************************************************************ |
| */ |
| void *ihevce_decomp_pre_intra_init( |
| iv_mem_rec_t *ps_mem_tab, |
| ihevce_static_cfg_params_t *ps_init_prms, |
| WORD32 i4_num_proc_thrds, |
| func_selector_t *ps_func_selector, |
| WORD32 i4_resolution_id, |
| UWORD8 u1_is_popcnt_available) |
| { |
| ihevce_decomp_pre_intra_master_ctxt_t *ps_mstr_ctxt = ps_mem_tab[DECOMP_PRE_INTRA_CTXT].pv_base; |
| ihevce_decomp_pre_intra_ctxt_t *ps_ctxt = ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].pv_base; |
| ihevce_ed_ctxt_t *ps_ed_ctxt = ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].pv_base; |
| ihevce_tgt_params_t *ps_tgt_prms = &ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]; |
| WORD32 min_cu_size = 1 << ps_init_prms->s_config_prms.i4_min_log2_cu_size; |
| WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS]; |
| WORD32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS]; |
| WORD32 n_tot_layers; |
| WORD32 i, j, k; |
| |
| /* Get the height and width of each layer */ |
| *a_wd = ps_tgt_prms->i4_width + SET_CTB_ALIGN(ps_tgt_prms->i4_width, min_cu_size); |
| *a_ht = ps_tgt_prms->i4_height + SET_CTB_ALIGN(ps_tgt_prms->i4_height, min_cu_size); |
| n_tot_layers = hme_derive_num_layers(1, a_wd, a_ht, a_disp_wd, a_disp_ht); |
| ps_mstr_ctxt->i4_num_proc_thrds = i4_num_proc_thrds; |
| for(i = 0; i < ps_mstr_ctxt->i4_num_proc_thrds; i++) |
| { |
| ps_mstr_ctxt->aps_decomp_pre_intra_thrd_ctxt[i] = ps_ctxt; |
| ps_ctxt->i4_num_layers = n_tot_layers; |
| ps_ctxt->ps_ed_ctxt = ps_ed_ctxt; |
| for(j = 0; j < n_tot_layers; j++) |
| { |
| /** If CTB size= 64, decomp_blk_wd = 64 for L0, 32 for L1 , 16 for L2, 8 for L3 */ |
| WORD32 max_ctb_size = 1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size; |
| WORD32 decomp_blk_wd = max_ctb_size >> j; |
| WORD32 decomp_blk_ht = max_ctb_size >> j; |
| |
| ps_ctxt->as_layers[j].i4_actual_wd = a_wd[j]; |
| ps_ctxt->as_layers[j].i4_actual_ht = a_ht[j]; |
| if(0 == j) |
| { |
| ps_ctxt->as_layers[j].i4_padded_ht = a_ht[j]; |
| ps_ctxt->as_layers[j].i4_padded_wd = a_wd[j]; |
| } |
| else |
| { |
| ps_ctxt->as_layers[j].i4_padded_ht = a_ht[j] + 32 + 4; |
| ps_ctxt->as_layers[j].i4_padded_wd = a_wd[j] + 32 + 4; |
| } |
| ps_ctxt->as_layers[j].pu1_inp = NULL; |
| ps_ctxt->as_layers[j].i4_inp_stride = 0; |
| ps_ctxt->as_layers[j].i4_decomp_blk_ht = decomp_blk_ht; |
| ps_ctxt->as_layers[j].i4_decomp_blk_wd = decomp_blk_wd; |
| ps_ctxt->as_layers[j].i4_num_row_blks = ((a_ht[j] + (decomp_blk_ht - 1)) / decomp_blk_ht); |
| ps_ctxt->as_layers[j].i4_num_col_blks = ((a_wd[j] + (decomp_blk_wd - 1)) / decomp_blk_wd); |
| for(k = 0; k < MAX_NUM_CTB_ROWS_FRM; k++) |
| { |
| ps_ctxt->as_layers[j].ai4_curr_row_no[k] = -1; |
| } |
| ps_ctxt->as_layers[j].i4_num_rows_processed = 0; |
| } |
| ps_ctxt->i4_quality_preset = ps_tgt_prms->i4_quality_preset; |
| if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7) |
| { |
| ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6; |
| } |
| if(ps_init_prms->s_coding_tools_prms.i4_vqet & |
| (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) |
| { |
| if(ps_init_prms->s_coding_tools_prms.i4_vqet & |
| (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) |
| { |
| ps_ctxt->i4_enable_noise_detection = 1; |
| } |
| else |
| { |
| ps_ctxt->i4_enable_noise_detection = 0; |
| } |
| } |
| else |
| { |
| ps_ctxt->i4_enable_noise_detection = 0; |
| } |
| ihevce_cmn_utils_instr_set_router( |
| &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type); |
| ihevce_ipe_instr_set_router( |
| &ps_ctxt->s_ipe_optimised_function_list, ps_init_prms->e_arch_type); |
| |
| ps_ed_ctxt->ps_func_selector = ps_func_selector; |
| |
| ps_ctxt++; |
| ps_ed_ctxt++; |
| } |
| /* return the handle to caller */ |
| return ((void *)ps_mstr_ctxt); |
| } |
| |
| /*! |
| ************************************************************************ |
| * @brief |
| * Init decomp pre intra layer buffers |
| ************************************************************************ |
| */ |
| void ihevce_decomp_pre_intra_frame_init( |
| void *pv_ctxt, |
| UWORD8 **ppu1_decomp_lyr_bufs, |
| WORD32 *pi4_lyr_buf_stride, |
| ihevce_ed_blk_t *ps_layer1_buf, |
| ihevce_ed_blk_t *ps_layer2_buf, |
| ihevce_ed_ctb_l1_t *ps_ed_ctb_l1, |
| WORD32 i4_ol_sad_lambda_qf, |
| ctb_analyse_t *ps_ctb_analyse) |
| { |
| ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt = pv_ctxt; |
| ihevce_decomp_pre_intra_ctxt_t *ps_ctxt; |
| WORD32 i, j; |
| |
| for(i = 0; i < ps_master_ctxt->i4_num_proc_thrds; i++) |
| { |
| ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[i]; |
| |
| /* L0 layer (actual input) is registered in process call */ |
| for(j = 1; j < ps_ctxt->i4_num_layers; j++) |
| { |
| ps_ctxt->as_layers[j].i4_inp_stride = pi4_lyr_buf_stride[j - 1]; |
| ps_ctxt->as_layers[j].pu1_inp = ppu1_decomp_lyr_bufs[j - 1]; |
| |
| /* Populating the buffer pointers for layer1 and layer2 buffers to store the |
| structure for each 4x4 block after pre intra analysis on their respective layers */ |
| if(j == 1) |
| { |
| WORD32 sad_lambda_l1 = (3 * i4_ol_sad_lambda_qf >> 2); |
| WORD32 temp = 1 << LAMBDA_Q_SHIFT; |
| WORD32 lambda = ((temp) > sad_lambda_l1) ? temp : sad_lambda_l1; |
| |
| ps_ctxt->ps_layer1_buf = ps_layer1_buf; |
| ps_ctxt->ps_ed_ctb_l1 = ps_ed_ctb_l1; |
| ps_ctxt->ai4_lambda[j] = lambda; |
| } |
| else if(j == 2) |
| { |
| WORD32 sad_lambda_l2 = i4_ol_sad_lambda_qf >> 1; |
| WORD32 temp = 1 << LAMBDA_Q_SHIFT; |
| WORD32 lambda = ((temp) > sad_lambda_l2) ? temp : sad_lambda_l2; |
| |
| ps_ctxt->ps_layer2_buf = ps_layer2_buf; |
| ps_ctxt->ai4_lambda[j] = lambda; |
| } |
| else |
| { |
| ps_ctxt->ai4_lambda[j] = -1; |
| } |
| } |
| |
| /* make the ps_ctb_analyse refernce as a part of the private context */ |
| ps_ctxt->ps_ctb_analyse = ps_ctb_analyse; |
| } |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief Merge Sort function. |
| * |
| * @par Description: |
| * This function sorts the data in the input array in ascending |
| * order using merge sort algorithm. Intermediate data obtained in |
| * merge sort are stored in output 2-D array. |
| * |
| * @param[in] |
| * pi4_input_val : Input 1-D array |
| * aai4_output_val: Output 2-D array containing elements sorted in sets of |
| * 4,16,64 etc. |
| * i4_length : length of the array |
| * i4_ip_sort_level: Input sort level. Specifies the level upto which array is sorted. |
| * It should be 1 if the array is unsorted. Should be 4 if array is sorted |
| * in sets of 4. |
| * i4_op_sort_level: Output sort level. Specify the level upto which sorting is required. |
| * If it is given as length of array it sorts for whole array. |
| * |
| ******************************************************************************* |
| */ |
| void ihevce_merge_sort( |
| WORD32 *pi4_input_val, |
| WORD32 aai4_output_val[][64], |
| WORD32 i4_length, |
| WORD32 i4_ip_sort_level, |
| WORD32 i4_op_sort_level) |
| { |
| WORD32 i, j, k; |
| WORD32 count, level; |
| WORD32 temp[64]; |
| WORD32 *pi4_temp_buf_cpy; |
| WORD32 *pi4_temp = &temp[0]; |
| WORD32 calc_level; |
| |
| pi4_temp_buf_cpy = pi4_temp; |
| |
| GETRANGE(calc_level, i4_op_sort_level / i4_ip_sort_level); |
| |
| calc_level = calc_level - 1; |
| |
| /*** This function is written under the assumption that we need only intermediate values of |
| sort in the range of 4,16,64 etc. ***/ |
| ASSERT((calc_level % 2) == 0); |
| |
| /** One iteration of this for loop does 1 sets of sort and produces one intermediate value in 2 iterations **/ |
| for(level = 0; level < calc_level; level++) |
| { |
| /** Merges adjacent sets of elements based on current sort level **/ |
| for(count = 0; count < i4_length; (count = count + (i4_ip_sort_level * 2))) |
| { |
| i = 0; |
| j = 0; |
| if(pi4_input_val[i4_ip_sort_level - 1] < pi4_input_val[i4_ip_sort_level]) |
| { |
| /*** Condition for early exit ***/ |
| memcpy(&pi4_temp[0], pi4_input_val, sizeof(WORD32) * i4_ip_sort_level * 2); |
| } |
| else |
| { |
| for(k = 0; k < (i4_ip_sort_level * 2); k++) |
| { |
| if((i < i4_ip_sort_level) && (j < i4_ip_sort_level)) |
| { |
| if(pi4_input_val[i] > pi4_input_val[j + i4_ip_sort_level]) |
| { |
| /** copy to output array **/ |
| pi4_temp[k] = pi4_input_val[j + i4_ip_sort_level]; |
| j++; |
| } |
| else |
| { |
| /** copy to output array **/ |
| pi4_temp[k] = pi4_input_val[i]; |
| i++; |
| } |
| } |
| else if(i == i4_ip_sort_level) |
| { |
| /** copy the remaining data to output array **/ |
| pi4_temp[k] = pi4_input_val[j + i4_ip_sort_level]; |
| j++; |
| } |
| else |
| { |
| /** copy the remaining data to output array **/ |
| pi4_temp[k] = pi4_input_val[i]; |
| i++; |
| } |
| } |
| } |
| pi4_input_val += (i4_ip_sort_level * 2); |
| pi4_temp += (i4_ip_sort_level * 2); |
| } |
| pi4_input_val = pi4_temp - i4_length; |
| |
| if(level % 2) |
| { |
| /** Assign a temp address for storing next sort level output as we will not need this data as output **/ |
| pi4_temp = pi4_temp_buf_cpy; |
| } |
| else |
| { |
| /** Assign address for storing the intermediate data into output 2-D array **/ |
| pi4_temp = aai4_output_val[level / 2]; |
| } |
| i4_ip_sort_level *= 2; |
| } |
| } |
| |
| /*! |
| ************************************************************************ |
| * @brief |
| * Calculate the average activities at 16*16 (8*8 in L1) and 32*32 |
| * (8*8 in L2) block sizes. As this function accumulates activities |
| * across blocks of a frame, this needs to be called by only one thread |
| * and only after ensuring the processing of entire frame is done |
| ************************************************************************ |
| */ |
| void ihevce_decomp_pre_intra_curr_frame_pre_intra_deinit( |
| void *pv_pre_intra_ctxt, |
| pre_enc_me_ctxt_t *ps_curr_out, |
| frm_ctb_ctxt_t *ps_frm_ctb_prms) |
| { |
| ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt = pv_pre_intra_ctxt; |
| ihevce_decomp_pre_intra_ctxt_t *ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[0]; |
| |
| ULWORD64 u8_frame_8x8_sum_act_sqr = 0; |
| LWORD64 ai8_frame_8x8_sum_act_sqr[2] = { 0, 0 }; |
| WORD32 ai4_frame_8x8_sum_act[2] = { 0, 0 }; |
| WORD32 ai4_frame_8x8_sum_blks[2] = { 0, 0 }; |
| |
| LWORD64 ai8_frame_16x16_sum_act_sqr[3] = { 0, 0, 0 }; |
| WORD32 ai4_frame_16x16_sum_act[3] = { 0, 0, 0 }; |
| WORD32 ai4_frame_16x16_sum_blks[3] = { 0, 0, 0 }; |
| |
| LWORD64 ai8_frame_32x32_sum_act_sqr[3] = { 0, 0, 0 }; |
| WORD32 ai4_frame_32x32_sum_act[3] = { 0, 0, 0 }; |
| WORD32 ai4_frame_32x32_sum_blks[3] = { 0, 0, 0 }; |
| |
| ihevce_ed_ctb_l1_t *ps_ed_ctb_pic_l1 = ps_curr_out->ps_ed_ctb_l1; |
| ihevce_ed_blk_t *ps_ed_blk_l1 = ps_curr_out->ps_layer1_buf; |
| WORD32 ctb_wd = ps_ctxt->as_layers[1].i4_decomp_blk_wd; |
| WORD32 h_ctb_cnt = ps_ctxt->as_layers[1].i4_num_col_blks; |
| WORD32 v_ctb_cnt = ps_ctxt->as_layers[1].i4_num_row_blks; |
| WORD32 sub_blk_cnt = ((ctb_wd >> 2) * (ctb_wd >> 2)); |
| WORD32 i4_avg_noise_satd; |
| WORD32 ctb_ctr, vert_ctr; |
| WORD32 i, j, k; |
| |
| { |
| /* Calculate min noise threshold */ |
| /* Min noise threshold is calculated by taking average of lowest 1% satd val in |
| * the complete 4x4 frame satds */ |
| #define MAX_SATD 64 |
| #define SATD_NOISE_FLOOR_THRESHOLD 16 |
| #define MIN_BLKS 2 |
| WORD32 i4_layer_wd = ps_ctxt->as_layers[1].i4_actual_wd; |
| WORD32 i4_layer_ht = ps_ctxt->as_layers[1].i4_actual_ht; |
| WORD32 i4_min_blk = ((MIN_BLKS * (i4_layer_wd >> 1) * (i4_layer_ht >> 1)) / 100); |
| WORD32 i4_total_blks = 0; |
| WORD32 satd_hist[MAX_SATD]; |
| LWORD64 i8_acc_satd = 0; |
| |
| memset(satd_hist, 0, sizeof(satd_hist)); |
| for(i = 0; i < sub_blk_cnt * h_ctb_cnt * v_ctb_cnt; i++) |
| { |
| if(ps_ed_blk_l1[i].i4_4x4_satd >= 0 && ps_ed_blk_l1[i].i4_4x4_satd < MAX_SATD) |
| { |
| satd_hist[ps_ed_blk_l1[i].i4_4x4_satd]++; |
| } |
| } |
| for(i = 0; i < MAX_SATD && i4_total_blks <= i4_min_blk; i++) |
| { |
| i4_total_blks += satd_hist[i]; |
| i8_acc_satd += (i * satd_hist[i]); |
| } |
| if(i4_total_blks < i4_min_blk) |
| { |
| i4_avg_noise_satd = SATD_NOISE_FLOOR_THRESHOLD; |
| } |
| else |
| { |
| i4_avg_noise_satd = (WORD32)(i8_acc_satd + (i4_total_blks >> 1)) / i4_total_blks; |
| } |
| ps_curr_out->i4_avg_noise_thrshld_4x4 = i4_avg_noise_satd; |
| } |
| |
| for(vert_ctr = 0; vert_ctr < v_ctb_cnt; vert_ctr++) |
| { |
| ihevce_ed_ctb_l1_t *ps_ed_ctb_row_l1 = |
| ps_ed_ctb_pic_l1 + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz; |
| ihevce_ed_blk_t *ps_ed = ps_ed_blk_l1 + (vert_ctr * sub_blk_cnt * h_ctb_cnt); |
| |
| for(ctb_ctr = 0; ctb_ctr < h_ctb_cnt; ctb_ctr++, ps_ed += sub_blk_cnt) |
| { |
| ihevce_ed_ctb_l1_t *ps_ed_ctb_curr_l1 = ps_ed_ctb_row_l1 + ctb_ctr; |
| WORD8 b8_satd_eval[4]; |
| WORD32 ai4_satd_4x4[64]; |
| WORD32 ai4_satd_8x8[16]; // derived from accumulating 4x4 satds |
| WORD32 ai4_satd_16x16[4] = { 0 }; // derived from accumulating 8x8 satds |
| WORD32 i4_satd_32x32 = 0; // derived from accumulating 8x8 satds |
| /* This 2-D array will contain 4x4 satds sorted in ascending order in sets |
| * of 4, 16, 64 For example : '5 10 2 7 6 12 3 1' array input will return |
| * '2 5 7 10 1 3 6 12' if sorted in sets of 4 */ |
| WORD32 aai4_sort_4_16_64_satd[3][64]; |
| /* This 2-D array will contain 8x8 satds sorted in ascending order in sets of |
| * 4, 16***/ |
| WORD32 aai4_sort_4_16_satd[2][64]; |
| |
| memset(b8_satd_eval, 1, sizeof(b8_satd_eval)); |
| for(i = 0; i < 4; i++) |
| { |
| ihevce_ed_blk_t *ps_ed_b32 = &ps_ed[i * 16]; |
| |
| for(j = 0; j < 4; j++) |
| { |
| ihevce_ed_blk_t *ps_ed_b16 = &ps_ed_b32[j * 4]; |
| WORD32 satd_sum = 0; |
| WORD32 blk_cnt = 0; |
| |
| for(k = 0; k < 4; k++) |
| { |
| ihevce_ed_blk_t *ps_ed_b4 = &ps_ed_b16[k]; |
| |
| if(-1 != ps_ed_b4->i4_4x4_satd) |
| { |
| #define SUB_NOISE_THRSHLD 0 |
| #if SUB_NOISE_THRSHLD |
| ps_ed_b4->i4_4x4_satd = ps_ed_b4->i4_4x4_satd - i4_avg_noise_satd; |
| if(ps_ed_b4->i4_4x4_satd < 0) |
| { |
| ps_ed_b4->i4_4x4_satd = 0; |
| } |
| #else |
| if(ps_ed_b4->i4_4x4_satd < i4_avg_noise_satd) |
| { |
| ps_ed_b4->i4_4x4_satd = i4_avg_noise_satd; |
| } |
| #endif |
| blk_cnt++; |
| satd_sum += ps_ed_b4->i4_4x4_satd; |
| } |
| ai4_satd_4x4[i * 16 + j * 4 + k] = ps_ed_b4->i4_4x4_satd; |
| } |
| ASSERT(blk_cnt == 0 || blk_cnt == 4); |
| if(blk_cnt == 0) |
| { |
| satd_sum = -1; |
| } |
| ai4_satd_8x8[i * 4 + j] = satd_sum; |
| ai4_satd_16x16[i] += satd_sum; |
| i4_satd_32x32 += satd_sum; |
| ps_ed_ctb_curr_l1->i4_sum_4x4_satd[i * 4 + j] = satd_sum; |
| } |
| } |
| |
| { |
| /* This function will sort 64 elements in array ai4_satd_4x4 in ascending order |
| * to 3 arrays in sets of 4, 16, 64 into the 2-D array aai4_min_4_16_64_satd */ |
| WORD32 array_length = sizeof(ai4_satd_4x4) / sizeof(WORD32); |
| ihevce_merge_sort( |
| &ai4_satd_4x4[0], aai4_sort_4_16_64_satd, array_length, 1, 64); |
| |
| /* This function will sort 64 elements in array ai4_satd_8x8 in ascending order |
| * to 2 arrays in sets of 4, 16 into the 2-D array aai4_sum_4_16_satd_ctb */ |
| array_length = sizeof(ai4_satd_8x8) / sizeof(WORD32); |
| ihevce_merge_sort( |
| &ai4_satd_8x8[0], aai4_sort_4_16_satd, array_length, 1, 16); |
| } |
| |
| /* Populate avg satd to calculate modulation index and activity factors */ |
| /* 16x16 */ |
| for(i = 0; i < 4; i++) |
| { |
| for(j = 0; j < 4; j++) |
| { |
| WORD32 satd_sum = ps_ed_ctb_curr_l1->i4_sum_4x4_satd[i * 4 + j]; |
| WORD32 satd_min = aai4_sort_4_16_64_satd[0][i * 16 + j * 4 + MEDIAN_CU_TU]; |
| |
| ASSERT(-2 != satd_sum); |
| ps_ed_ctb_curr_l1->i4_min_4x4_satd[i * 4 + j] = satd_min; |
| |
| if(-1 != satd_sum) |
| { |
| ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][0] = satd_sum; |
| ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][1] = satd_min; |
| |
| u8_frame_8x8_sum_act_sqr += (satd_sum * satd_sum); |
| ai4_frame_8x8_sum_act[0] += satd_sum; |
| ai8_frame_8x8_sum_act_sqr[0] += (satd_sum * satd_sum); |
| ai4_frame_8x8_sum_blks[0] += 1; |
| ai4_frame_8x8_sum_act[1] += satd_min; |
| ai8_frame_8x8_sum_act_sqr[1] += (satd_min * satd_min); |
| ai4_frame_8x8_sum_blks[1] += 1; |
| } |
| else |
| { |
| ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][0] = -1; |
| ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][1] = -1; |
| b8_satd_eval[i] = 0; |
| } |
| } |
| |
| if(b8_satd_eval[i]) |
| { |
| ps_ed_ctb_curr_l1->i4_16x16_satd[i][0] = ai4_satd_16x16[i]; |
| ps_ed_ctb_curr_l1->i4_16x16_satd[i][1] = aai4_sort_4_16_satd[0][i * 4 + MEDIAN_CU_TU]; |
| ps_ed_ctb_curr_l1->i4_16x16_satd[i][2] = aai4_sort_4_16_64_satd[1][i * 16 + MEDIAN_CU_TU_BY_2]; |
| |
| for(k = 0; k < 3; k++) |
| { |
| WORD32 satd = ps_ed_ctb_curr_l1->i4_16x16_satd[i][k]; |
| |
| ai4_frame_16x16_sum_act[k] += satd; |
| ai8_frame_16x16_sum_act_sqr[k] += (satd * satd); |
| ai4_frame_16x16_sum_blks[k] += 1; |
| } |
| } |
| else |
| { |
| ps_ed_ctb_curr_l1->i4_16x16_satd[i][0] = -1; |
| ps_ed_ctb_curr_l1->i4_16x16_satd[i][1] = -1; |
| ps_ed_ctb_curr_l1->i4_16x16_satd[i][2] = -1; |
| } |
| } |
| |
| /*32x32*/ |
| if(b8_satd_eval[0] && b8_satd_eval[1] && b8_satd_eval[2] && b8_satd_eval[3]) |
| { |
| WORD32 aai4_sort_4_satd[1][64]; |
| WORD32 array_length = sizeof(ai4_satd_16x16) / sizeof(WORD32); |
| WORD32 satd; |
| |
| /* Sort 4 elements in ascending order */ |
| ihevce_merge_sort(ai4_satd_16x16, aai4_sort_4_satd, array_length, 1, 4); |
| |
| ps_ed_ctb_curr_l1->i4_32x32_satd[0][0] = aai4_sort_4_satd[0][MEDIAN_CU_TU]; |
| ps_ed_ctb_curr_l1->i4_32x32_satd[0][1] = aai4_sort_4_16_satd[1][MEDIAN_CU_TU_BY_2]; |
| ps_ed_ctb_curr_l1->i4_32x32_satd[0][2] = aai4_sort_4_16_64_satd[2][MEDIAN_CU_TU_BY_4]; |
| ps_ed_ctb_curr_l1->i4_32x32_satd[0][3] = i4_satd_32x32; |
| |
| for(k = 0; k < 3; k++) |
| { |
| WORD32 satd = ps_ed_ctb_curr_l1->i4_32x32_satd[0][k]; |
| |
| ai4_frame_32x32_sum_act[k] += satd; |
| ai8_frame_32x32_sum_act_sqr[k] += (satd * satd); |
| ai4_frame_32x32_sum_blks[k] += 1; |
| } |
| } |
| else |
| { |
| ps_ed_ctb_curr_l1->i4_32x32_satd[0][0] = -1; |
| ps_ed_ctb_curr_l1->i4_32x32_satd[0][1] = -1; |
| ps_ed_ctb_curr_l1->i4_32x32_satd[0][2] = -1; |
| ps_ed_ctb_curr_l1->i4_32x32_satd[0][3] = -1; |
| } |
| } |
| } |
| |
| for(i = 0; i < 2; i++) |
| { |
| /*8x8*/ |
| #if USE_SQRT_AVG_OF_SATD_SQR |
| ps_curr_out->i8_curr_frame_8x8_sum_act[i] = ai8_frame_8x8_sum_act_sqr[i]; |
| #else |
| ps_curr_out->i8_curr_frame_8x8_sum_act[i] = ai4_frame_8x8_sum_act[i]; |
| #endif |
| ps_curr_out->i4_curr_frame_8x8_sum_act_for_strength[i] = ai4_frame_8x8_sum_act[i]; |
| ps_curr_out->i4_curr_frame_8x8_num_blks[i] = ai4_frame_8x8_sum_blks[i]; |
| ps_curr_out->u8_curr_frame_8x8_sum_act_sqr = u8_frame_8x8_sum_act_sqr; |
| |
| /*16x16*/ |
| #if USE_SQRT_AVG_OF_SATD_SQR |
| ps_curr_out->i8_curr_frame_16x16_sum_act[i] = ai8_frame_16x16_sum_act_sqr[i]; |
| #else |
| ps_curr_out->i8_curr_frame_16x16_sum_act[i] = ai4_frame_16x16_sum_act[i]; |
| #endif |
| ps_curr_out->i4_curr_frame_16x16_num_blks[i] = ai4_frame_16x16_sum_blks[i]; |
| |
| /*32x32*/ |
| #if USE_SQRT_AVG_OF_SATD_SQR |
| ps_curr_out->i8_curr_frame_32x32_sum_act[i] = ai8_frame_32x32_sum_act_sqr[i]; |
| #else |
| ps_curr_out->i8_curr_frame_32x32_sum_act[i] = ai4_frame_32x32_sum_act[i]; |
| #endif |
| ps_curr_out->i4_curr_frame_32x32_num_blks[i] = ai4_frame_32x32_sum_blks[i]; |
| } |
| |
| /*16x16*/ |
| #if USE_SQRT_AVG_OF_SATD_SQR |
| ps_curr_out->i8_curr_frame_16x16_sum_act[2] = ai8_frame_16x16_sum_act_sqr[2]; |
| #else |
| ps_curr_out->i8_curr_frame_16x16_sum_act[2] = ai4_frame_16x16_sum_act[2]; |
| #endif |
| ps_curr_out->i4_curr_frame_16x16_num_blks[2] = ai4_frame_16x16_sum_blks[2]; |
| |
| /*32x32*/ |
| #if USE_SQRT_AVG_OF_SATD_SQR |
| ps_curr_out->i8_curr_frame_32x32_sum_act[2] = ai8_frame_32x32_sum_act_sqr[2]; |
| #else |
| ps_curr_out->i8_curr_frame_32x32_sum_act[2] = ai4_frame_32x32_sum_act[2]; |
| #endif |
| ps_curr_out->i4_curr_frame_32x32_num_blks[2] = ai4_frame_32x32_sum_blks[2]; |
| } |
| |
| /*! |
| ************************************************************************ |
| * @brief |
| * accumulate L1 intra satd across all threads. |
| * Note: call to this function has to be made after all threads have |
| * finished preintra processing |
| * |
| ************************************************************************ |
| */ |
| LWORD64 ihevce_decomp_pre_intra_get_frame_satd(void *pv_ctxt, WORD32 *wd, WORD32 *ht) |
| { |
| ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt = pv_ctxt; |
| ihevce_decomp_pre_intra_ctxt_t *ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[0]; |
| LWORD64 satd_sum = ps_ctxt->ps_ed_ctxt->i8_sum_best_satd; |
| WORD32 i; |
| |
| *wd = ps_ctxt->as_layers[1].i4_actual_wd; |
| *ht = ps_ctxt->as_layers[1].i4_actual_ht; |
| for(i = 1; i < ps_master_ctxt->i4_num_proc_thrds; i++) |
| { |
| ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[i]; |
| satd_sum += ps_ctxt->ps_ed_ctxt->i8_sum_best_satd; |
| } |
| |
| return satd_sum; |
| } |
| |
| LWORD64 ihevce_decomp_pre_intra_get_frame_satd_squared(void *pv_ctxt, WORD32 *wd, WORD32 *ht) |
| { |
| ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt = pv_ctxt; |
| ihevce_decomp_pre_intra_ctxt_t *ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[0]; |
| LWORD64 satd_sum = ps_ctxt->ps_ed_ctxt->i8_sum_sq_best_satd; |
| WORD32 i; |
| |
| *wd = ps_ctxt->as_layers[1].i4_actual_wd; |
| *ht = ps_ctxt->as_layers[1].i4_actual_ht; |
| for(i = 1; i < ps_master_ctxt->i4_num_proc_thrds; i++) |
| { |
| ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[i]; |
| satd_sum += ps_ctxt->ps_ed_ctxt->i8_sum_sq_best_satd; |
| } |
| |
| return satd_sum; |
| } |