| /****************************************************************************** |
| * |
| * Copyright (C) 2018 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at: |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| ***************************************************************************** |
| * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| */ |
| |
| /*! |
| ****************************************************************************** |
| * \file ihevce_coarse_me_pass.c |
| * |
| * \brief |
| * Converts the language of the encoder to language of me. This is an i/f |
| * between the encoder style APIs and ME style APIs. This is basically |
| * a memoryless glue layer. |
| * |
| * \date |
| * 22/10/2012 |
| * |
| * \author |
| * Ittiam |
| * |
| * |
| * List of Functions |
| * |
| * |
| ****************************************************************************** |
| */ |
| |
| /*****************************************************************************/ |
| /* File Includes */ |
| /*****************************************************************************/ |
| /* System include files */ |
| #include <stdio.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <assert.h> |
| #include <stdarg.h> |
| #include <math.h> |
| |
| /* User include files */ |
| #include "ihevc_typedefs.h" |
| #include "itt_video_api.h" |
| #include "ihevce_api.h" |
| |
| #include "rc_cntrl_param.h" |
| #include "rc_frame_info_collector.h" |
| #include "rc_look_ahead_params.h" |
| |
| #include "ihevc_defs.h" |
| #include "ihevc_structs.h" |
| #include "ihevc_platform_macros.h" |
| #include "ihevc_deblk.h" |
| #include "ihevc_itrans_recon.h" |
| #include "ihevc_chroma_itrans_recon.h" |
| #include "ihevc_chroma_intra_pred.h" |
| #include "ihevc_intra_pred.h" |
| #include "ihevc_inter_pred.h" |
| #include "ihevc_mem_fns.h" |
| #include "ihevc_padding.h" |
| #include "ihevc_weighted_pred.h" |
| #include "ihevc_sao.h" |
| #include "ihevc_resi_trans.h" |
| #include "ihevc_quant_iquant_ssd.h" |
| #include "ihevc_cabac_tables.h" |
| |
| #include "ihevce_defs.h" |
| #include "ihevce_lap_enc_structs.h" |
| #include "ihevce_multi_thrd_structs.h" |
| #include "ihevce_me_common_defs.h" |
| #include "ihevce_had_satd.h" |
| #include "ihevce_error_codes.h" |
| #include "ihevce_bitstream.h" |
| #include "ihevce_cabac.h" |
| #include "ihevce_rdoq_macros.h" |
| #include "ihevce_function_selector.h" |
| #include "ihevce_enc_structs.h" |
| #include "ihevce_entropy_structs.h" |
| #include "ihevce_cmn_utils_instr_set_router.h" |
| #include "ihevce_enc_loop_structs.h" |
| #include "ihevce_bs_compute_ctb.h" |
| #include "ihevce_global_tables.h" |
| #include "ihevce_dep_mngr_interface.h" |
| #include "hme_datatype.h" |
| #include "hme_interface.h" |
| #include "hme_common_defs.h" |
| #include "hme_defs.h" |
| #include "ihevce_me_instr_set_router.h" |
| #include "ihevce_ipe_instr_set_router.h" |
| #include "ihevce_ipe_structs.h" |
| #include "hme_globals.h" |
| #include "hme_utils.h" |
| #include "hme_coarse.h" |
| #include "hme_refine.h" |
| #include "ihevce_me_pass.h" |
| #include "ihevce_coarse_me_pass.h" |
| |
| /*****************************************************************************/ |
| /* Function Definitions */ |
| /*****************************************************************************/ |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_coarse_me_get_num_mem_recs \endif |
| * |
| * \brief |
| * Number of memory records are returned for ME module |
| * Note : Include total mem. req. for HME + Total mem. req. for Dep Mngr for HME |
| * |
| * \return |
| * Number of memory records |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| WORD32 ihevce_coarse_me_get_num_mem_recs() |
| { |
| WORD32 hme_mem_recs = hme_coarse_num_alloc(); |
| WORD32 hme_dep_mngr_mem_recs = hme_coarse_dep_mngr_num_alloc(); |
| |
| return ((hme_mem_recs + hme_dep_mngr_mem_recs)); |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_coarse_me_get_mem_recs \endif |
| * |
| * \brief |
| * Memory requirements are returned for coarse ME. |
| * |
| * \param[in,out] ps_mem_tab : pointer to memory descriptors table |
| * \param[in] ps_init_prms : Create time static parameters |
| * \param[in] i4_num_proc_thrds : Number of processing threads for this module |
| * \param[in] i4_mem_space : memspace in whihc memory request should be done |
| * |
| * \return |
| * Number of records |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| WORD32 ihevce_coarse_me_get_mem_recs( |
| iv_mem_rec_t *ps_mem_tab, |
| ihevce_static_cfg_params_t *ps_init_prms, |
| WORD32 i4_num_proc_thrds, |
| WORD32 i4_mem_space, |
| WORD32 i4_resolution_id) |
| { |
| hme_memtab_t as_memtabs[HME_COARSE_TOT_MEMTABS]; |
| WORD32 n_tabs, i; |
| |
| /* Init prms structure specific to HME */ |
| hme_init_prms_t s_hme_init_prms; |
| |
| //return (ihevce_coarse_me_get_num_mem_recs()); |
| /*************************************************************************/ |
| /* code flow: we call hme alloc function and then remap those memtabs */ |
| /* to a different type of memtab structure. */ |
| /*************************************************************************/ |
| ASSERT(HME_COARSE_TOT_MEMTABS >= hme_coarse_num_alloc()); |
| |
| /*************************************************************************/ |
| /* POPULATE THE HME INIT PRMS */ |
| /*************************************************************************/ |
| ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id); |
| |
| /*************************************************************************/ |
| /* CALL THE ME FUNCTION TO GET MEMTABS */ |
| /*************************************************************************/ |
| n_tabs = hme_coarse_alloc(&as_memtabs[0], &s_hme_init_prms); |
| ASSERT(n_tabs == hme_coarse_num_alloc()); |
| |
| /*************************************************************************/ |
| /* REMAP RESULTS TO ENCODER MEMTAB STRUCTURE */ |
| /*************************************************************************/ |
| for(i = 0; i < n_tabs; i++) |
| { |
| ps_mem_tab[i].i4_mem_size = as_memtabs[i].size; |
| ps_mem_tab[i].i4_mem_alignment = as_memtabs[i].align; |
| ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
| ps_mem_tab[i].i4_size = sizeof(iv_mem_rec_t); |
| } |
| |
| /*************************************************************************/ |
| /* --- HME Coarse sync Dep Mngr Mem requests -- */ |
| /*************************************************************************/ |
| { |
| WORD32 n_dep_tabs; |
| |
| ps_mem_tab += n_tabs; |
| |
| n_dep_tabs = hme_coarse_dep_mngr_alloc( |
| ps_mem_tab, ps_init_prms, i4_mem_space, i4_num_proc_thrds, i4_resolution_id); |
| |
| ASSERT(n_dep_tabs == hme_coarse_dep_mngr_num_alloc()); |
| |
| /* Update the total no. of mem tabs */ |
| n_tabs += n_dep_tabs; |
| } |
| |
| return (n_tabs); |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_coarse_me_init \endif |
| * |
| * \brief |
| * Intialization for ME context state structure . |
| * |
| * \param[in] ps_mem_tab : pointer to memory descriptors table |
| * \param[in] ps_init_prms : Create time static parameters |
| * \param[in] pv_osal_handle : Osal handle |
| * |
| * \return |
| * Handle to the ME context |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void *ihevce_coarse_me_init( |
| iv_mem_rec_t *ps_mem_tab, |
| ihevce_static_cfg_params_t *ps_init_prms, |
| WORD32 i4_num_proc_thrds, |
| void *pv_osal_handle, |
| WORD32 i4_resolution_id, |
| UWORD8 u1_is_popcnt_available) |
| { |
| /* ME handle to be returned */ |
| void *pv_me_ctxt; |
| WORD32 status; |
| coarse_me_master_ctxt_t *ps_ctxt; |
| |
| /* Init prms structure specific to HME */ |
| hme_init_prms_t s_hme_init_prms; |
| |
| /* memtabs to be passed to hme */ |
| hme_memtab_t as_memtabs[HME_COARSE_TOT_MEMTABS]; |
| WORD32 n_tabs, n_dep_tabs, i; |
| |
| /*************************************************************************/ |
| /* POPULATE THE HME INIT PRMS */ |
| /*************************************************************************/ |
| ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id); |
| |
| /*************************************************************************/ |
| /* Ensure local declaration is sufficient */ |
| /*************************************************************************/ |
| n_tabs = hme_coarse_num_alloc(); |
| ASSERT(HME_COARSE_TOT_MEMTABS >= n_tabs); |
| |
| /*************************************************************************/ |
| /* MAP RESULTS TO HME MEMTAB STRUCTURE */ |
| /*************************************************************************/ |
| for(i = 0; i < n_tabs; i++) |
| { |
| as_memtabs[i].size = ps_mem_tab[i].i4_mem_size; |
| as_memtabs[i].align = ps_mem_tab[i].i4_mem_alignment; |
| as_memtabs[i].pu1_mem = (U08 *)ps_mem_tab[i].pv_base; |
| } |
| /*************************************************************************/ |
| /* CALL THE ME FUNCTION TO GET MEMTABS */ |
| /*************************************************************************/ |
| pv_me_ctxt = (void *)as_memtabs[0].pu1_mem; |
| status = hme_coarse_init(pv_me_ctxt, &as_memtabs[0], &s_hme_init_prms); |
| ps_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
| if(status == -1) |
| return NULL; |
| |
| /*************************************************************************/ |
| /* --- HME sync Dep Mngr Mem init -- */ |
| /*************************************************************************/ |
| |
| ps_mem_tab += n_tabs; |
| |
| n_dep_tabs = hme_coarse_dep_mngr_init( |
| ps_mem_tab, ps_init_prms, pv_me_ctxt, pv_osal_handle, i4_num_proc_thrds, i4_resolution_id); |
| ASSERT(n_dep_tabs <= hme_coarse_dep_mngr_num_alloc()); |
| |
| n_tabs += n_dep_tabs; |
| |
| ihevce_me_instr_set_router( |
| (ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list, |
| ps_init_prms->e_arch_type); |
| |
| ihevce_cmn_utils_instr_set_router( |
| &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type); |
| |
| return (pv_me_ctxt); |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_coarse_me_reg_thrds_sem \endif |
| * |
| * \brief |
| * Intialization for ME context state structure with semaphores . |
| * |
| * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt |
| * \param[in] ppv_sem_hdls : Array of semaphore handles |
| * \param[in] i4_num_proc_thrds : Number of processing threads |
| * |
| * \return |
| * none |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_coarse_me_reg_thrds_sem(void *pv_me_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds) |
| { |
| hme_coarse_dep_mngr_reg_sem(pv_me_ctxt, ppv_sem_hdls, i4_num_proc_thrds); |
| |
| return; |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_coarse_me_delete \endif |
| * |
| * \brief |
| * Destroy Coarse ME module |
| * Note : Only Destroys the resources allocated in the module like |
| * semaphore,etc. Memory free is done Separately using memtabs |
| * |
| * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt |
| * \param[in] ps_init_prms : Create time static parameters |
| * \param[in] pv_osal_handle : Osal handle |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_coarse_me_delete( |
| void *pv_me_ctxt, ihevce_static_cfg_params_t *ps_init_prms, WORD32 i4_resolution_id) |
| { |
| /* --- HME sync Dep Mngr Delete --*/ |
| hme_coarse_dep_mngr_delete(pv_me_ctxt, ps_init_prms, i4_resolution_id); |
| } |
| |
| /** |
| ******************************************************************************* |
| * \if Function name : ihevce_coarse_me_set_resolution \endif |
| * |
| * \brief |
| * Sets the resolution for ME state |
| * |
| * \par Description: |
| * ME requires information of resolution to prime up its layer descriptors |
| * and contexts. This API is called whenever a control call from application |
| * causes a change of resolution. Has to be called once initially before |
| * processing any frame. Again this is just a glue function and calls the |
| * actual ME API for the same. |
| * |
| * \param[in,out] pv_me_ctxt: Handle to the ME context |
| * \param[in] n_enc_layers: Number of layers getting encoded |
| * \param[in] p_wd : Pointer containing widths of each layer getting encoded. |
| * \param[in] p_ht : Pointer containing heights of each layer getting encoded. |
| * |
| * \returns |
| * none |
| * |
| * \author |
| * Ittiam |
| * |
| ******************************************************************************* |
| */ |
| void ihevce_coarse_me_set_resolution( |
| void *pv_me_ctxt, WORD32 n_enc_layers, WORD32 *p_wd, WORD32 *p_ht) |
| { |
| /* local variables */ |
| coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
| WORD32 thrds; |
| |
| for(thrds = 0; thrds < ps_master_ctxt->i4_num_proc_thrds; thrds++) |
| { |
| coarse_me_ctxt_t *ps_me_thrd_ctxt; |
| |
| ps_me_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrds]; |
| |
| hme_coarse_set_resolution((void *)ps_me_thrd_ctxt, n_enc_layers, p_wd, p_ht); |
| } |
| } |
| void ihevce_coarse_me_get_rc_param( |
| void *pv_me_ctxt, |
| LWORD64 *i8_acc_frame_hme_cost, |
| LWORD64 *i8_acc_frame_hme_sad, |
| LWORD64 *i8_acc_num_blks_higher_sad, |
| LWORD64 *i8_total_blks, |
| WORD32 i4_is_prev_pic_same_scene) |
| { |
| coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
| WORD32 thrds; |
| coarse_me_ctxt_t *ps_me_thrd_ctxt; |
| |
| *i8_acc_frame_hme_cost = 0; |
| *i8_acc_frame_hme_sad = 0; |
| |
| for(thrds = 0; thrds < ps_master_ctxt->i4_num_proc_thrds; thrds++) |
| { |
| ps_me_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrds]; |
| *i8_acc_frame_hme_cost += ps_me_thrd_ctxt->i4_L1_hme_best_cost; |
| |
| /*Calculate me cost wrt. to ref only for P frame */ |
| if(ps_me_thrd_ctxt->s_frm_prms.is_i_pic == ps_me_thrd_ctxt->s_frm_prms.bidir_enabled) |
| { |
| *i8_acc_num_blks_higher_sad += ps_me_thrd_ctxt->i4_num_blks_high_sad; |
| *i8_total_blks += ps_me_thrd_ctxt->i4_num_blks; |
| } |
| |
| *i8_acc_frame_hme_sad += ps_me_thrd_ctxt->i4_L1_hme_sad; |
| } |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_coarse_me_process \endif |
| * |
| * \brief |
| * Frame level ME function |
| * |
| * \par Description: |
| * Processing of all layers starting from coarse and going |
| * to the refinement layers, except enocde layer |
| * |
| * \param[in] pv_ctxt : pointer to ME module |
| * \param[in] ps_enc_lap_inp : pointer to input yuv buffer (frame buffer) |
| * \param[in,out] ps_ctb_out : pointer to CTB analyse output structure (frame buffer) |
| * \param[out] ps_cu_out : pointer to CU analyse output structure (frame buffer) |
| * \param[in] pd_intra_costs : pointerto intra cost buffer |
| * \param[in] ps_multi_thrd_ctxt : pointer to multi thread ctxt |
| * \param[in] thrd_id : Thread id of the current thrd in which function is executed |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_coarse_me_process( |
| void *pv_me_ctxt, |
| ihevce_lap_enc_buf_t *ps_enc_lap_inp, |
| multi_thrd_ctxt_t *ps_multi_thrd_ctxt, |
| WORD32 thrd_id, |
| WORD32 i4_ping_pong) |
| |
| { |
| /* local variables */ |
| coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
| coarse_me_ctxt_t *ps_thrd_ctxt; |
| |
| /* get the current thread ctxt pointer */ |
| ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrd_id]; |
| ps_thrd_ctxt->thrd_id = thrd_id; |
| |
| /* frame level processing function */ |
| hme_coarse_process_frm( |
| (void *)ps_thrd_ctxt, |
| &ps_master_ctxt->s_ref_map, |
| &ps_master_ctxt->s_frm_prms, |
| ps_multi_thrd_ctxt, |
| i4_ping_pong, |
| &ps_master_ctxt->apv_dep_mngr_hme_sync[0]); |
| |
| return; |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_coarse_me_frame_end \endif |
| * |
| * \brief |
| * End of frame update function performs |
| * - GMV collation |
| * - Dynamic Search Range collation |
| * |
| * \param[in] pv_ctxt : pointer to ME module |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_coarse_me_frame_end(void *pv_me_ctxt) |
| { |
| /* local variables */ |
| coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
| coarse_me_ctxt_t *ps_thrd0_ctxt; |
| layer_ctxt_t *ps_curr_layer; |
| WORD32 num_ref, num_thrds, cur_poc; |
| WORD32 coarse_layer_id; |
| WORD32 i4_num_ref; |
| ME_QUALITY_PRESETS_T e_me_quality_preset; |
| |
| /* GMV collation is done for coarse Layer only */ |
| ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
| coarse_layer_id = ps_thrd0_ctxt->num_layers - 1; |
| ps_curr_layer = ps_thrd0_ctxt->ps_curr_descr->aps_layers[coarse_layer_id]; |
| i4_num_ref = ps_master_ctxt->s_ref_map.i4_num_ref; |
| e_me_quality_preset = ps_thrd0_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets; |
| |
| /* No processing is required if current pic is I pic */ |
| if(1 == ps_master_ctxt->s_frm_prms.is_i_pic) |
| { |
| return; |
| } |
| |
| /* use thrd 0 ctxt to collate the GMVs histogram and Dynamic Search Range */ |
| /* across all threads */ |
| for(num_ref = 0; num_ref < i4_num_ref; num_ref++) |
| { |
| WORD32 i4_offset, i4_lobe_size, i4_layer_id; |
| mv_hist_t *ps_hist_thrd0; |
| dyn_range_prms_t *aps_dyn_range_prms_thrd0[MAX_NUM_LAYERS]; |
| |
| ps_hist_thrd0 = ps_thrd0_ctxt->aps_mv_hist[num_ref]; |
| |
| /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ |
| if(ps_thrd0_ctxt->s_frm_prms.is_i_pic == ps_thrd0_ctxt->s_frm_prms.bidir_enabled) |
| { |
| for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--) |
| { |
| aps_dyn_range_prms_thrd0[i4_layer_id] = |
| &ps_thrd0_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][num_ref]; |
| } |
| } |
| |
| i4_lobe_size = ps_hist_thrd0->i4_lobe1_size; |
| i4_offset = i4_lobe_size >> 1; |
| |
| /* run a loop over all the other threads to add up the histogram */ |
| /* and to update the dynamical search range */ |
| for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) |
| { |
| dyn_range_prms_t *ps_dyn_range_prms; |
| |
| if(ME_XTREME_SPEED_25 != e_me_quality_preset) |
| { |
| mv_hist_t *ps_hist; |
| WORD32 i4_y, i4_x; |
| /* get current thrd histogram pointer */ |
| ps_hist = ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_mv_hist[num_ref]; |
| |
| /* Accumalate the Bin count for all the thread */ |
| for(i4_y = 0; i4_y < ps_hist_thrd0->i4_num_rows; i4_y++) |
| { |
| for(i4_x = 0; i4_x < ps_hist_thrd0->i4_num_cols; i4_x++) |
| { |
| S32 i4_bin_id; |
| |
| i4_bin_id = i4_x + (i4_y * ps_hist_thrd0->i4_num_cols); |
| |
| ps_hist_thrd0->ai4_bin_count[i4_bin_id] += |
| ps_hist->ai4_bin_count[i4_bin_id]; |
| } |
| } |
| } |
| |
| /* Update the dynamical search range for each Layer */ |
| /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ |
| if(ps_thrd0_ctxt->s_frm_prms.is_i_pic == ps_thrd0_ctxt->s_frm_prms.bidir_enabled) |
| { |
| for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--) |
| { |
| /* get current thrd, layer dynamical search range param. pointer */ |
| ps_dyn_range_prms = |
| &ps_master_ctxt->aps_me_ctxt[num_thrds] |
| ->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][num_ref]; |
| /* TODO : This calls can be optimized further. No need for min in 1st call and max in 2nd call */ |
| hme_update_dynamic_search_params( |
| aps_dyn_range_prms_thrd0[i4_layer_id], ps_dyn_range_prms->i2_dyn_max_y); |
| |
| hme_update_dynamic_search_params( |
| aps_dyn_range_prms_thrd0[i4_layer_id], ps_dyn_range_prms->i2_dyn_min_y); |
| } |
| } |
| } |
| } |
| |
| /*************************************************************************/ |
| /* Get the MAX/MIN per POC distance based on the all the ref. pics */ |
| /*************************************************************************/ |
| /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ |
| if(ps_thrd0_ctxt->s_frm_prms.is_i_pic == ps_thrd0_ctxt->s_frm_prms.bidir_enabled) |
| { |
| WORD32 i4_layer_id; |
| cur_poc = ps_thrd0_ctxt->i4_curr_poc; |
| |
| for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--) |
| { |
| ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id] = 0; |
| ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id] = 0; |
| } |
| |
| for(num_ref = 0; num_ref < i4_num_ref; num_ref++) |
| { |
| for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--) |
| { |
| WORD16 i2_mv_per_poc; |
| WORD32 ref_poc, poc_diff; |
| dyn_range_prms_t *ps_dyn_range_prms_thrd0; |
| |
| ps_dyn_range_prms_thrd0 = |
| &ps_thrd0_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][num_ref]; |
| |
| ref_poc = ps_dyn_range_prms_thrd0->i4_poc; |
| ASSERT(ref_poc < cur_poc); |
| poc_diff = (cur_poc - ref_poc); |
| |
| /* cur. ref. pic. max y per POC */ |
| i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_max_y + (poc_diff - 1)) / poc_diff; |
| /* update the max y per POC */ |
| ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id] = |
| MAX(ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id], |
| i2_mv_per_poc); |
| |
| /* cur. ref. pic. min y per POC */ |
| i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_min_y - (poc_diff - 1)) / poc_diff; |
| /* update the min y per POC */ |
| ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id] = |
| MIN(ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id], |
| i2_mv_per_poc); |
| } |
| } |
| |
| /*************************************************************************/ |
| /* Populate the results to all thread ctxt */ |
| /*************************************************************************/ |
| for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) |
| { |
| for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--) |
| { |
| ps_master_ctxt->aps_me_ctxt[num_thrds] |
| ->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id] = |
| ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id]; |
| |
| ps_master_ctxt->aps_me_ctxt[num_thrds] |
| ->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id] = |
| ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id]; |
| } |
| } |
| } |
| |
| if(ME_XTREME_SPEED_25 != e_me_quality_preset) |
| { |
| /* call the function which calcualtes the GMV */ |
| /* layer pointer is shared across all threads */ |
| /* hence all threads will have access to updated */ |
| /* GMVs populated using thread 0 ctxt */ |
| for(num_ref = 0; num_ref < i4_num_ref; num_ref++) |
| { |
| hme_calculate_global_mv( |
| ps_thrd0_ctxt->aps_mv_hist[num_ref], |
| &ps_curr_layer->s_global_mv[num_ref][GMV_THICK_LOBE], |
| GMV_THICK_LOBE); |
| } |
| } |
| return; |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_coarse_me_frame_dpb_update \endif |
| * |
| * \brief |
| * Frame level ME initialisation function |
| * |
| * \par Description: |
| * Updation of ME's internal DPB |
| * based on available ref list information |
| * |
| * \param[in] pv_ctxt : pointer to ME module |
| * \param[in] num_ref_l0 : Number of reference pics in L0 list |
| * \param[in] num_ref_l1 : Number of reference pics in L1 list |
| * \param[in] pps_rec_list_l0 : List of recon pics in L0 list |
| * \param[in] pps_rec_list_l1 : List of recon pics in L1 list |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_coarse_me_frame_dpb_update( |
| void *pv_me_ctxt, |
| WORD32 num_ref_l0, |
| WORD32 num_ref_l1, |
| recon_pic_buf_t **pps_rec_list_l0, |
| recon_pic_buf_t **pps_rec_list_l1) |
| { |
| coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
| coarse_me_ctxt_t *ps_thrd0_ctxt; |
| WORD32 a_pocs_buffered_in_me[MAX_NUM_REF + 1]; |
| WORD32 a_pocs_to_remove[MAX_NUM_REF + 2]; |
| WORD32 poc_remove_id = 0; |
| WORD32 i, count; |
| |
| /* All processing done using shared / common memory across */ |
| /* threads is done using thrd ctxt */ |
| ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
| |
| /*************************************************************************/ |
| /* Updation of ME's DPB list. This involves the following steps: */ |
| /* 1. Obtain list of active POCs maintained within ME. */ |
| /* 2. Search each of them in the ref list. Whatever is not found goes to */ |
| /* the list to be removed. Note: a_pocs_buffered_in_me holds the */ |
| /* currently active POC list within ME. a_pocs_to_remove holds the */ |
| /* list of POCs to be removed, terminated by -1. */ |
| /*************************************************************************/ |
| hme_coarse_get_active_pocs_list((void *)ps_thrd0_ctxt, a_pocs_buffered_in_me); |
| |
| count = 0; |
| while(a_pocs_buffered_in_me[count] != -1) |
| { |
| WORD32 poc_to_search = a_pocs_buffered_in_me[count]; |
| WORD32 match_found_flag = 0; |
| |
| /*********************************************************************/ |
| /* Search in any one list (L0/L1) since both lists contain all the */ |
| /* active ref pics. */ |
| /*********************************************************************/ |
| for(i = 0; i < num_ref_l0; i++) |
| { |
| if(poc_to_search == pps_rec_list_l0[i]->i4_poc) |
| { |
| match_found_flag = 1; |
| break; |
| } |
| } |
| for(i = 0; i < num_ref_l1; i++) |
| { |
| if(poc_to_search == pps_rec_list_l1[i]->i4_poc) |
| { |
| match_found_flag = 1; |
| break; |
| } |
| } |
| |
| if(0 == match_found_flag) |
| { |
| /*****************************************************************/ |
| /* POC buffered inside ME but not part of ref list given by DPB */ |
| /* Hence this needs to be flagged to ME for removal. */ |
| /*****************************************************************/ |
| a_pocs_to_remove[poc_remove_id] = poc_to_search; |
| poc_remove_id++; |
| } |
| count++; |
| } |
| |
| /* List termination */ |
| a_pocs_to_remove[poc_remove_id] = -1; |
| |
| /* Call the ME API to remove "outdated" POCs */ |
| hme_coarse_discard_frm(ps_thrd0_ctxt, a_pocs_to_remove); |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_coarse_me_frame_init \endif |
| * |
| * \brief |
| * Coarse Frame level ME initialisation function |
| * |
| * \par Description: |
| * The following pre-conditions exist for this function: a. We have the input |
| * pic ready for encode, b. We have the reference list with POC, L0/L1 IDs |
| * and ref ptrs ready for this picture and c. ihevce_me_set_resolution has |
| * been called atleast once. Once these are supplied, the following are |
| * done here: a. Input pyramid creation, b. Updation of ME's internal DPB |
| * based on available ref list information |
| * |
| * \param[in] pv_ctxt : pointer to ME module |
| * \param[in] ps_frm_ctb_prms : CTB characteristics parameters |
| * \param[in] ps_frm_lamda : Frame level Lambda params |
| * \param[in] num_ref_l0 : Number of reference pics in L0 list |
| * \param[in] num_ref_l1 : Number of reference pics in L1 list |
| * \param[in] num_ref_l0_active : Active reference pics in L0 dir for current frame (shall be <= num_ref_l0) |
| * \param[in] num_ref_l1_active : Active reference pics in L1 dir for current frame (shall be <= num_ref_l1) |
| * \param[in] pps_rec_list_l0 : List of recon pics in L0 list |
| * \param[in] pps_rec_list_l1 : List of recon pics in L1 list |
| * \param[in] ps_enc_lap_inp : pointer to input yuv buffer (frame buffer) |
| * \param[in] i4_frm_qp : current picture QP |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_coarse_me_frame_init( |
| void *pv_me_ctxt, |
| ihevce_static_cfg_params_t *ps_stat_prms, |
| frm_ctb_ctxt_t *ps_frm_ctb_prms, |
| frm_lambda_ctxt_t *ps_frm_lamda, |
| WORD32 num_ref_l0, |
| WORD32 num_ref_l1, |
| WORD32 num_ref_l0_active, |
| WORD32 num_ref_l1_active, |
| recon_pic_buf_t **pps_rec_list_l0, |
| recon_pic_buf_t **pps_rec_list_l1, |
| ihevce_lap_enc_buf_t *ps_enc_lap_inp, |
| WORD32 i4_frm_qp, |
| ihevce_ed_blk_t *ps_layer1_buf, //EIID |
| ihevce_ed_ctb_l1_t *ps_ed_ctb_l1, |
| UWORD8 *pu1_me_reverse_map_info, |
| WORD32 i4_temporal_layer_id) |
| { |
| /* local variables */ |
| coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
| coarse_me_ctxt_t *ps_ctxt; |
| coarse_me_ctxt_t *ps_thrd0_ctxt; |
| WORD32 inp_poc, num_ref; |
| WORD32 i; |
| |
| /* Input POC is derived from input buffer */ |
| inp_poc = ps_enc_lap_inp->s_lap_out.i4_poc; |
| num_ref = num_ref_l0 + num_ref_l1; |
| |
| /* All processing done using shared / common memory across */ |
| /* threads is done using thrd 0 ctxt */ |
| ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
| |
| ps_master_ctxt->s_frm_prms.u1_num_active_ref_l0 = num_ref_l0_active; |
| ps_master_ctxt->s_frm_prms.u1_num_active_ref_l1 = num_ref_l1_active; |
| |
| /* store the frm ctb ctxt to all the thrd ctxt */ |
| { |
| WORD32 num_thrds; |
| |
| /* initialise the parameters for all the threads */ |
| for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) |
| { |
| ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
| ps_ctxt->pv_ext_frm_prms = (void *)ps_frm_ctb_prms; |
| /*EIID: early decision buffer pointer */ |
| ps_ctxt->ps_ed_blk = ps_layer1_buf; |
| ps_ctxt->ps_ed_ctb_l1 = ps_ed_ctb_l1; |
| |
| /* weighted pred enable flag */ |
| ps_ctxt->i4_wt_pred_enable_flag = ps_enc_lap_inp->s_lap_out.i1_weighted_pred_flag | |
| ps_enc_lap_inp->s_lap_out.i1_weighted_bipred_flag; |
| |
| if(1 == ps_ctxt->i4_wt_pred_enable_flag) |
| { |
| /* log2 weight denom */ |
| ps_ctxt->s_wt_pred.wpred_log_wdc = |
| ps_enc_lap_inp->s_lap_out.i4_log2_luma_wght_denom; |
| } |
| else |
| { |
| /* default value */ |
| ps_ctxt->s_wt_pred.wpred_log_wdc = DENOM_DEFAULT; |
| } |
| ps_ctxt->i4_L1_hme_best_cost = 0; |
| ps_ctxt->i4_L1_hme_sad = 0; |
| ps_ctxt->i4_num_blks_high_sad = 0; |
| ps_ctxt->i4_num_blks = 0; |
| |
| ps_ctxt->pv_me_optimised_function_list = ps_master_ctxt->pv_me_optimised_function_list; |
| ps_ctxt->ps_cmn_utils_optimised_function_list = &ps_master_ctxt->s_cmn_opt_func; |
| } |
| } |
| /* Create the reference map for ME */ |
| ihevce_me_create_ref_map( |
| pps_rec_list_l0, |
| pps_rec_list_l1, |
| num_ref_l0_active, |
| num_ref_l1_active, |
| num_ref, |
| &ps_master_ctxt->s_ref_map); |
| /*************************************************************************/ |
| /* Call the ME frame level processing for further actiion. */ |
| /* ToDo: Support Row Level API. */ |
| /*************************************************************************/ |
| ps_master_ctxt->s_frm_prms.i2_mv_range_x = ps_thrd0_ctxt->s_init_prms.max_horz_search_range; |
| ps_master_ctxt->s_frm_prms.i2_mv_range_y = ps_thrd0_ctxt->s_init_prms.max_vert_search_range; |
| |
| ps_master_ctxt->s_frm_prms.is_i_pic = 0; |
| ps_master_ctxt->s_frm_prms.i4_temporal_layer_id = i4_temporal_layer_id; |
| |
| ps_master_ctxt->s_frm_prms.is_pic_second_field = |
| (!(ps_enc_lap_inp->s_input_buf.i4_bottom_field ^ |
| ps_enc_lap_inp->s_input_buf.i4_topfield_first)); |
| { |
| S32 pic_type = ps_enc_lap_inp->s_lap_out.i4_pic_type; |
| |
| /*********************************************************************/ |
| /* For I Pic, we do not call update fn at ctb level, instead we do */ |
| /* one shot update for entire picture. */ |
| /*********************************************************************/ |
| if((pic_type == IV_I_FRAME) || (pic_type == IV_II_FRAME) || (pic_type == IV_IDR_FRAME)) |
| { |
| ps_master_ctxt->s_frm_prms.is_i_pic = 1; |
| ps_master_ctxt->s_frm_prms.bidir_enabled = 0; |
| } |
| else if((pic_type == IV_P_FRAME) || (pic_type == IV_PP_FRAME)) |
| { |
| ps_master_ctxt->s_frm_prms.bidir_enabled = 0; |
| } |
| else if((pic_type == IV_B_FRAME) || (pic_type == IV_BB_FRAME)) |
| { |
| ps_master_ctxt->s_frm_prms.bidir_enabled = 1; |
| } |
| else |
| { |
| /* not sure whether we need to handle mixed frames like IP, */ |
| /* they should ideally come as single field. */ |
| /* TODO : resolve thsi ambiguity */ |
| ASSERT(0); |
| } |
| } |
| /************************************************************************/ |
| /* Lambda calculations moved outside ME and to one place, so as to have */ |
| /* consistent lambda across ME, IPE, CL RDOPT etc */ |
| /************************************************************************/ |
| |
| { |
| #define CLIP3_F(min, max, val) (((val) < (min)) ? (min) : (((val) > (max)) ? (max) : (val))) |
| double q_steps[6] = { 0.625, 0.703, 0.79, 0.889, 1.0, 1.125 }; |
| double d_b_pic_factor; |
| double d_q_factor; |
| //double d_lambda; |
| UWORD8 u1_temp_hier = ps_enc_lap_inp->s_lap_out.i4_temporal_lyr_id; |
| |
| if(u1_temp_hier) |
| { |
| d_b_pic_factor = CLIP3_F(2.0, 4.0, (i4_frm_qp - 12.0) / 6.0); |
| } |
| else |
| d_b_pic_factor = 1.0; |
| |
| d_q_factor = (1 << (i4_frm_qp / 6)) * q_steps[i4_frm_qp % 6]; |
| ps_master_ctxt->s_frm_prms.qstep = (WORD32)d_q_factor; |
| ps_master_ctxt->s_frm_prms.i4_frame_qp = i4_frm_qp; |
| } |
| |
| /* HME Dependency Manager : Reset the num ctb processed in every row */ |
| /* for ME sync in every layer */ |
| { |
| WORD32 ctr; |
| for(ctr = 1; ctr < ps_thrd0_ctxt->num_layers; ctr++) |
| { |
| void *pv_dep_mngr_state; |
| pv_dep_mngr_state = ps_master_ctxt->apv_dep_mngr_hme_sync[ctr - 1]; |
| |
| ihevce_dmgr_rst_row_row_sync(pv_dep_mngr_state); |
| } |
| } |
| |
| /* Frame level init of all threads of ME */ |
| { |
| WORD32 num_thrds; |
| |
| /* initialise the parameters for all the threads */ |
| for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) |
| { |
| ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
| |
| hme_coarse_process_frm_init( |
| (void *)ps_ctxt, ps_ctxt->ps_hme_ref_map, ps_ctxt->ps_hme_frm_prms); |
| } |
| } |
| |
| ps_master_ctxt->s_frm_prms.i4_cl_sad_lambda_qf = ps_frm_lamda->i4_cl_sad_lambda_qf; |
| ps_master_ctxt->s_frm_prms.i4_cl_satd_lambda_qf = ps_frm_lamda->i4_cl_satd_lambda_qf; |
| ps_master_ctxt->s_frm_prms.i4_ol_sad_lambda_qf = ps_frm_lamda->i4_ol_sad_lambda_qf; |
| ps_master_ctxt->s_frm_prms.i4_ol_satd_lambda_qf = ps_frm_lamda->i4_ol_satd_lambda_qf; |
| ps_master_ctxt->s_frm_prms.lambda_q_shift = LAMBDA_Q_SHIFT; |
| |
| ps_master_ctxt->s_frm_prms.pf_interp_fxn = NULL; |
| |
| /*************************************************************************/ |
| /* If num ref is 0, that means that it has to be coded as I. Do nothing */ |
| /* However mv bank update needs to happen with "intra" mv. */ |
| /*************************************************************************/ |
| if(ps_master_ctxt->s_ref_map.i4_num_ref == 0 || ps_master_ctxt->s_frm_prms.is_i_pic) |
| { |
| for(i = 1; i < ps_thrd0_ctxt->num_layers; i++) |
| { |
| layer_ctxt_t *ps_layer_ctxt = ps_thrd0_ctxt->ps_curr_descr->aps_layers[i]; |
| BLK_SIZE_T e_blk_size; |
| S32 use_4x4; |
| |
| /* The mv bank is filled with "intra" mv */ |
| use_4x4 = hme_get_mv_blk_size( |
| ps_thrd0_ctxt->s_init_prms.use_4x4, |
| i, |
| ps_thrd0_ctxt->num_layers, |
| ps_thrd0_ctxt->u1_encode[i]); |
| e_blk_size = use_4x4 ? BLK_4x4 : BLK_8x8; |
| hme_init_mv_bank(ps_layer_ctxt, e_blk_size, 2, 1, ps_ctxt->u1_encode[i]); |
| hme_fill_mvbank_intra(ps_layer_ctxt); |
| |
| /* Clear out the global mvs */ |
| memset( |
| ps_layer_ctxt->s_global_mv, |
| 0, |
| sizeof(hme_mv_t) * ps_thrd0_ctxt->max_num_ref * NUM_GMV_LOBES); |
| } |
| |
| return; |
| } |
| |
| /*************************************************************************/ |
| /* Coarse & refine Layer frm init (layer mem is common across thrds) */ |
| /*************************************************************************/ |
| { |
| coarse_prms_t s_coarse_prms; |
| refine_prms_t s_refine_prms; |
| S16 i2_max; |
| S32 layer_id; |
| |
| layer_id = ps_thrd0_ctxt->num_layers - 1; |
| i2_max = ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x; |
| i2_max = MAX(i2_max, ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y); |
| s_coarse_prms.i4_layer_id = layer_id; |
| |
| { |
| S32 log_start_step; |
| /* Based on Preset, set the starting step size for Refinement */ |
| if(ME_MEDIUM_SPEED > ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets) |
| { |
| log_start_step = 0; |
| } |
| else |
| { |
| log_start_step = 1; |
| } |
| s_coarse_prms.i4_max_iters = i2_max >> log_start_step; |
| s_coarse_prms.i4_start_step = 1 << log_start_step; |
| } |
| s_coarse_prms.i4_num_ref = ps_master_ctxt->s_ref_map.i4_num_ref; |
| s_coarse_prms.do_full_search = 1; |
| s_coarse_prms.num_results = ps_thrd0_ctxt->max_num_results_coarse; |
| |
| hme_coarse_frm_init(ps_thrd0_ctxt, &s_coarse_prms); |
| |
| layer_id--; |
| |
| /*************************************************************************/ |
| /* This loop will run for all refine layers (non- encode layers) */ |
| /*************************************************************************/ |
| while(layer_id > 0) |
| { |
| layer_ctxt_t *ps_curr_layer; |
| layer_ctxt_t *ps_coarse_layer; |
| |
| ps_coarse_layer = ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id + 1]; |
| |
| ps_curr_layer = ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id]; |
| |
| hme_set_refine_prms( |
| &s_refine_prms, |
| ps_thrd0_ctxt->u1_encode[layer_id], |
| ps_master_ctxt->s_ref_map.i4_num_ref, |
| layer_id, |
| ps_thrd0_ctxt->num_layers, |
| ps_thrd0_ctxt->num_layers_explicit_search, |
| ps_thrd0_ctxt->s_init_prms.use_4x4, |
| &ps_master_ctxt->s_frm_prms, |
| NULL, |
| &ps_thrd0_ctxt->s_init_prms.s_me_coding_tools); |
| |
| hme_refine_frm_init(ps_curr_layer, &s_refine_prms, ps_coarse_layer); |
| |
| layer_id--; |
| } |
| } |
| |
| return; |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_decomp_pre_intra_frame_init \endif |
| * |
| * \brief |
| * Frame Intialization for Decomp intra pre analysis. |
| * |
| * \param[in] pv_ctxt : pointer to module ctxt |
| * \param[in] ppu1_decomp_lyr_bufs : pointer to array of layer buffer pointers |
| * \param[in] pi4_lyr_buf_stride : pointer to array of layer buffer strides |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| WORD32 ihevce_coarse_me_get_lyr_buf_desc( |
| void *pv_me_ctxt, UWORD8 **ppu1_decomp_lyr_bufs, WORD32 *pi4_lyr_buf_stride) |
| { |
| /* local variables */ |
| coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
| coarse_me_ctxt_t *ps_thrd0_ctxt; |
| WORD32 lyr_no; |
| layers_descr_t *ps_curr_descr; |
| WORD32 i4_free_idx; |
| |
| /* All processing done using shared / common memory across */ |
| /* threads is done using thrd0 ctxt */ |
| ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
| |
| /* Obtain an empty layer descriptor */ |
| i4_free_idx = hme_coarse_find_free_descr_idx((void *)ps_thrd0_ctxt); |
| |
| ps_curr_descr = &ps_thrd0_ctxt->as_ref_descr[i4_free_idx]; |
| |
| /* export all the layer buffers except Layer 0 (encode layer) */ |
| for(lyr_no = 1; lyr_no < ps_thrd0_ctxt->num_layers; lyr_no++) |
| { |
| pi4_lyr_buf_stride[lyr_no - 1] = ps_curr_descr->aps_layers[lyr_no]->i4_inp_stride; |
| ppu1_decomp_lyr_bufs[lyr_no - 1] = ps_curr_descr->aps_layers[lyr_no]->pu1_inp; |
| } |
| |
| return (i4_free_idx); |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_coarse_me_get_lyr_prms_job_que \endif |
| * |
| * \brief Returns to the caller key attributes related to dependency between layers |
| * for multi-thread execution |
| * |
| * |
| * \par Description: |
| * This function requires the precondition that the width and ht of encode |
| * layer is known, and ME API ihevce_me_set_resolution() API called with |
| * this info. Based on this, ME populates useful information for the encoder |
| * to execute the multi-thread (concurrent across layers) in this API. |
| * The number of layers, number of vertical units in each layer, and for |
| * each vertial unit in each layer, its dependency on previous layer's units |
| * From ME's perspective, a vertical unit is one which is smallest min size |
| * vertically (and spans the entire row horizontally). This is CTB for encode |
| * layer, and 8x8 / 4x4 for non encode layers. |
| * |
| * \param[in] pv_ctxt : ME handle |
| * \param[in] ps_curr_inp : Input buffer descriptor |
| * \param[out] pi4_num_hme_lyrs : Num of HME layers (ME updates) |
| * \param[out] pi4_num_vert_units_in_lyr : Array of size N (num layers), each |
| * entry has num vertical units in that particular layer |
| * \param[in] ps_me_job_q_prms : Array of job queue prms, one for each unit in a |
| * layer. Note that this is contiguous in order of processing |
| * All k units of layer N-1 from top to bottom, followed by |
| * all m units of layer N-2 .... ends with X units of layer 0 |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_coarse_me_get_lyr_prms_job_que( |
| void *pv_me_ctxt, |
| ihevce_lap_enc_buf_t *ps_curr_inp, |
| WORD32 *pi4_num_hme_lyrs, |
| WORD32 *pi4_num_vert_units_in_lyr, |
| multi_thrd_me_job_q_prms_t *ps_me_job_q_prms) |
| { |
| coarse_me_ctxt_t *ps_ctxt; |
| coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
| |
| /* These arrays and ptrs track input dependencies for units of a layer */ |
| /* This is a ping poing design, while using one part, we update other part */ |
| U08 au1_inp_dep[2][MAX_NUM_VERT_UNITS_FRM]; |
| U08 *pu1_inp_dep_c, *pu1_inp_dep_n; |
| |
| /* Height of current and next layers */ |
| S32 ht_c, ht_n; |
| |
| /* Blk ht at a given layer and next layer*/ |
| S32 unit_ht_c, unit_ht_n, blk_ht_c, blk_ht_n; |
| |
| /* Number of vertical units in current and next layer */ |
| S32 num_vert_c, num_vert_n; |
| |
| S32 ctb_size = 64, num_layers, i, j, k; |
| |
| /* since same layer desc pointer is stored in all thread ctxt */ |
| /* a free idx is obtained using 0th thread ctxt pointer */ |
| ps_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
| |
| /* Set the number of layers */ |
| num_layers = ps_ctxt->num_layers; |
| *pi4_num_hme_lyrs = num_layers; |
| |
| pu1_inp_dep_c = &au1_inp_dep[0][0]; |
| pu1_inp_dep_n = &au1_inp_dep[1][0]; |
| |
| ASSERT(num_layers >= 2); |
| |
| ht_n = ps_ctxt->a_ht[num_layers - 2]; |
| ht_c = ps_ctxt->a_ht[num_layers - 1]; |
| |
| /* compute blk ht and unit ht for c and n */ |
| if(ps_ctxt->u1_encode[num_layers - 1]) |
| { |
| blk_ht_c = 16; |
| unit_ht_c = ctb_size; |
| } |
| else |
| { |
| blk_ht_c = hme_get_blk_size(ps_ctxt->s_init_prms.use_4x4, num_layers - 1, num_layers, 0); |
| unit_ht_c = blk_ht_c; |
| } |
| |
| num_vert_c = (ht_c + unit_ht_c - 1) / unit_ht_c; |
| |
| /* For new design in Coarsest HME layer we need */ |
| /* one additional row extra at the end of frame */ |
| /* hence num_vert_c is incremented by 1 */ |
| num_vert_c++; |
| |
| /* Dummy initialization outside loop, not used first time */ |
| memset(pu1_inp_dep_c, 0, num_vert_c); |
| |
| /*************************************************************************/ |
| /* Run through each layer, set the number of vertical units and job queue*/ |
| /* attrs for each vert unit in the layer */ |
| /*************************************************************************/ |
| for(i = num_layers - 1; i > 0; i--) |
| { |
| /* 0th entry is actually layer id num_layers - 1 */ |
| /* and entry num_layers-1 equals the biggest layer (id = 0) */ |
| pi4_num_vert_units_in_lyr[num_layers - 1 - i] = num_vert_c; |
| /* "n" is computed for first time */ |
| ht_n = ps_ctxt->a_ht[i - 1]; |
| blk_ht_n = hme_get_blk_size(ps_ctxt->s_init_prms.use_4x4, i - 1, num_layers, 0); |
| unit_ht_n = blk_ht_n; |
| if(ps_ctxt->u1_encode[i - 1]) |
| unit_ht_n = ctb_size; |
| |
| num_vert_n = (ht_n + unit_ht_n - 1) / unit_ht_n; |
| /* Initialize all units' inp dep in next layer to 0 */ |
| memset(pu1_inp_dep_n, 0, num_vert_n * sizeof(U08)); |
| |
| /* Evaluate dependencies for this layer */ |
| for(j = 0; j < num_vert_c; j++) |
| { |
| S32 v1, v2; |
| |
| /* Output dependencies. When one unit in current layer finishes, */ |
| /* how many in the next layer it affects?. Assuming that the top */ |
| /* of this vertical unit and bottom of this vertical unit project*/ |
| /* somewhere in the next layer. The top of this vertical unit */ |
| /* becomes the bottom right point for somebody, and the bottom of*/ |
| /* this vertical unit becomes the colocated pt for somebody, this*/ |
| /* is the extremum. */ |
| |
| /* for the initial unit affected by j in "c" layer, take j-1th */ |
| /* unit top and project it. */ |
| v1 = (j - 1) * unit_ht_c * ht_n; |
| v1 /= (ht_c * unit_ht_n); |
| v1 -= 1; |
| |
| /* for the final unit affected by j in "c" layer, take jth unit */ |
| /* bottom and project it. */ |
| |
| v2 = (j + 1) * unit_ht_c * ht_n; |
| v2 /= (ht_c * unit_ht_n); |
| v2 += 1; |
| |
| /* Clip to be within valid limits */ |
| v1 = HME_CLIP(v1, 0, (num_vert_n - 1)); |
| v2 = HME_CLIP(v2, 0, (num_vert_n - 1)); |
| |
| /* In the layer "n", units starting at offset v1, and upto v2 are*/ |
| /* dependent on unit j of layer "c". So for each of these units */ |
| /* increment the dependency by 1 corresponding to "jth" unit in */ |
| /* layer "c" */ |
| ps_me_job_q_prms->i4_num_output_dep = v2 - v1 + 1; |
| ASSERT(ps_me_job_q_prms->i4_num_output_dep <= MAX_OUT_DEP); |
| for(k = v1; k <= v2; k++) |
| pu1_inp_dep_n[k]++; |
| |
| /* Input dependency would have been calculated in prev run */ |
| ps_me_job_q_prms->i4_num_inp_dep = pu1_inp_dep_c[j]; |
| ASSERT(ps_me_job_q_prms->i4_num_inp_dep <= MAX_OUT_DEP); |
| |
| /* Offsets */ |
| for(k = v1; k <= v2; k++) |
| ps_me_job_q_prms->ai4_out_dep_unit_off[k - v1] = k; |
| |
| ps_me_job_q_prms++; |
| } |
| |
| /* Compute the blk size and vert unit size in each layer */ |
| /* "c" denotes curr layer, and "n" denotes the layer to which result */ |
| /* is projected to */ |
| ht_c = ht_n; |
| blk_ht_c = blk_ht_n; |
| unit_ht_c = unit_ht_n; |
| num_vert_c = num_vert_n; |
| |
| /* Input dep count for next layer was computed this iteration. */ |
| /* Swap so that p_inp_dep_n becomes current for next iteration, */ |
| /* and p_inp_dep_c will become update area during next iteration */ |
| /* for next to next. */ |
| { |
| U08 *pu1_tmp = pu1_inp_dep_n; |
| pu1_inp_dep_n = pu1_inp_dep_c; |
| pu1_inp_dep_c = pu1_tmp; |
| } |
| } |
| |
| /* LAYER 0 OR ENCODE LAYER UPDATE : NO OUTPUT DEPS */ |
| |
| /* set the numebr of vertical units */ |
| pi4_num_vert_units_in_lyr[num_layers - 1] = num_vert_c; |
| for(j = 0; j < num_vert_c; j++) |
| { |
| /* Here there is no output dependency for ME. However this data is used for encode, */ |
| /* and there is a 1-1 correspondence between this and the encode */ |
| /* Hence we set output dependency of 1 */ |
| ps_me_job_q_prms->i4_num_output_dep = 1; |
| ps_me_job_q_prms->ai4_out_dep_unit_off[0] = j; |
| ps_me_job_q_prms->i4_num_inp_dep = pu1_inp_dep_c[j]; |
| ASSERT(ps_me_job_q_prms->i4_num_inp_dep <= MAX_OUT_DEP); |
| ps_me_job_q_prms++; |
| } |
| |
| return; |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_coarse_me_set_lyr1_mv_bank \endif |
| * |
| * \brief |
| * Frame level ME initialisation of MV bank of penultimate layer |
| * |
| * \par Description: |
| * Updates the Layer1 context with the given buffers |
| * |
| * \param[in] pv_me_ctxt : pointer to ME module |
| * \param[in] pu1_mv_bank : MV bank buffer pointer |
| * \param[in] pu1_ref_idx_bank : refrence bank buffer pointer |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_coarse_me_set_lyr1_mv_bank( |
| void *pv_me_ctxt, |
| ihevce_lap_enc_buf_t *ps_enc_lap_inp, |
| void *pv_mv_bank, |
| void *pv_ref_idx_bank, |
| WORD32 i4_curr_idx) |
| { |
| coarse_me_ctxt_t *ps_thrd0_ctxt; |
| coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
| layer_ctxt_t *ps_lyr1_ctxt; |
| |
| /* Input descriptor that is updated and passed to ME */ |
| hme_inp_desc_t s_inp_desc; |
| |
| /*************************************************************************/ |
| /* Add the current input to ME's DPB. This will also create the pyramids */ |
| /* for the HME layers tha are not "encoded". */ |
| /*************************************************************************/ |
| s_inp_desc.i4_poc = ps_enc_lap_inp->s_lap_out.i4_poc; |
| s_inp_desc.s_layer_desc[0].pu1_y = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_y_buf; |
| s_inp_desc.s_layer_desc[0].pu1_u = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_u_buf; |
| s_inp_desc.s_layer_desc[0].pu1_v = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_v_buf; |
| |
| s_inp_desc.s_layer_desc[0].luma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_y_strd; |
| s_inp_desc.s_layer_desc[0].chroma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_uv_strd; |
| |
| hme_coarse_add_inp(pv_me_ctxt, &s_inp_desc, i4_curr_idx); |
| |
| /* All processing done using shared / common memory across */ |
| /* threads is done using thrd 0 ctxt since layer ctxt is shared accross all threads */ |
| ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
| |
| ps_lyr1_ctxt = ps_thrd0_ctxt->ps_curr_descr->aps_layers[1]; |
| |
| /* register the mv bank & ref idx bank pointer */ |
| ps_lyr1_ctxt->ps_layer_mvbank->pi1_ref_idx_base = (S08 *)pv_ref_idx_bank; |
| ps_lyr1_ctxt->ps_layer_mvbank->ps_mv_base = (hme_mv_t *)pv_mv_bank; |
| |
| return; |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_coarse_me_get_lyr1_ctxt \endif |
| * |
| * \brief |
| * function to get teh Layer 1 properties to be passed on the encode layer |
| * |
| * \par Description: |
| * Ucopies the enitre layer ctxt emory to the destination |
| * |
| * \param[in] pv_me_ctxt : pointer to ME module |
| * \param[in] pu1_mv_bank : MV bank buffer pointer |
| * \param[in] pu1_ref_idx_bank : refrence bank buffer pointer |
| * |
| * \return |
| * None |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_coarse_me_get_lyr1_ctxt( |
| void *pv_me_ctxt, void *pv_layer_ctxt, void *pv_layer_mv_bank_ctxt) |
| { |
| coarse_me_ctxt_t *ps_thrd0_ctxt; |
| coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
| layer_ctxt_t *ps_lyr1_ctxt; |
| |
| /* All processing done using shared / common memory across */ |
| /* threads is done using thrd 0 ctxt since layer ctxt is shared accross all threads */ |
| ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
| |
| /* get the context of layer 1 */ |
| ps_lyr1_ctxt = ps_thrd0_ctxt->ps_curr_descr->aps_layers[1]; |
| |
| /* copy the layer ctxt eve registerd mv bank & ref idx bank also goes in */ |
| memcpy(pv_layer_ctxt, ps_lyr1_ctxt, sizeof(layer_ctxt_t)); |
| |
| /* copy the layer mv bank contents */ |
| memcpy(pv_layer_mv_bank_ctxt, ps_lyr1_ctxt->ps_layer_mvbank, sizeof(layer_mv_t)); |
| |
| /* register the MV bank pointer in the layer ctxt*/ |
| ((layer_ctxt_t *)pv_layer_ctxt)->ps_layer_mvbank = (layer_mv_t *)pv_layer_mv_bank_ctxt; |
| |
| return; |
| } |