| /****************************************************************************** |
| * |
| * Copyright (C) 2018 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at: |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| ***************************************************************************** |
| * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| */ |
| /** |
| ******************************************************************************* |
| * @file |
| * ihevce_stasino_helpers.c |
| * |
| * @brief |
| * |
| * @author |
| * Ittiam |
| * |
| * @par List of Functions: |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| |
| /*****************************************************************************/ |
| /* File Includes */ |
| /*****************************************************************************/ |
| /* System include files */ |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <assert.h> |
| #include <string.h> |
| |
| /* User include files */ |
| #include "ihevc_typedefs.h" |
| #include "itt_video_api.h" |
| #include "ihevce_api.h" |
| |
| #include "rc_cntrl_param.h" |
| #include "rc_frame_info_collector.h" |
| #include "rc_look_ahead_params.h" |
| |
| #include "ihevc_defs.h" |
| #include "ihevc_structs.h" |
| #include "ihevc_platform_macros.h" |
| #include "ihevc_deblk.h" |
| #include "ihevc_itrans_recon.h" |
| #include "ihevc_chroma_itrans_recon.h" |
| #include "ihevc_chroma_intra_pred.h" |
| #include "ihevc_intra_pred.h" |
| #include "ihevc_inter_pred.h" |
| #include "ihevc_mem_fns.h" |
| #include "ihevc_padding.h" |
| #include "ihevc_weighted_pred.h" |
| #include "ihevc_sao.h" |
| #include "ihevc_resi_trans.h" |
| #include "ihevc_quant_iquant_ssd.h" |
| #include "ihevc_cabac_tables.h" |
| |
| #include "ihevce_defs.h" |
| #include "ihevce_lap_enc_structs.h" |
| #include "ihevce_multi_thrd_structs.h" |
| #include "ihevce_me_common_defs.h" |
| #include "ihevce_had_satd.h" |
| #include "ihevce_error_codes.h" |
| #include "ihevce_bitstream.h" |
| #include "ihevce_cabac.h" |
| #include "ihevce_rdoq_macros.h" |
| #include "ihevce_function_selector.h" |
| #include "ihevce_enc_structs.h" |
| #include "ihevce_entropy_structs.h" |
| #include "ihevce_cmn_utils_instr_set_router.h" |
| #include "ihevce_enc_loop_structs.h" |
| #include "ihevce_stasino_helpers.h" |
| |
| /*****************************************************************************/ |
| /* Function Definitions */ |
| /*****************************************************************************/ |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief |
| * This function calculates the variance of given data set. |
| * |
| * @par Description: |
| * This function is mainly used to find the variance of the block of pixel values. |
| * The block can be rectangular also. Single pass variance calculation |
| * implementation. |
| * |
| * @param[in] p_input |
| * The input buffer to calculate the variance. |
| * |
| * @param[out] pi4_mean |
| * Pointer ot the mean of the datset |
| * |
| * @param[out] pi4_variance |
| * Pointer tot he variabce of the data set |
| * |
| * @param[in] u1_is_hbd |
| * 1 if the data is in high bit depth |
| * |
| * @param[in] stride |
| * Stride for the input buffer |
| * |
| * @param[in] block_height |
| * height of the pixel block |
| * |
| * @param[in] block_width |
| * width of the pixel block |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| void ihevce_calc_variance( |
| void *pv_input, |
| WORD32 i4_stride, |
| WORD32 *pi4_mean, |
| UWORD32 *pu4_variance, |
| UWORD8 u1_block_height, |
| UWORD8 u1_block_width, |
| UWORD8 u1_is_hbd, |
| UWORD8 u1_disable_normalization) |
| { |
| UWORD8 *pui1_buffer; // pointer for 8 bit usecase |
| WORD32 i, j; |
| WORD32 total_elements; |
| |
| LWORD64 mean; |
| ULWORD64 variance; |
| ULWORD64 sum; |
| ULWORD64 sq_sum; |
| |
| /* intialisation */ |
| total_elements = u1_block_height * u1_block_width; |
| mean = 0; |
| variance = 0; |
| sum = 0; |
| sq_sum = 0; |
| |
| /* handle the case of 8/10 bit depth separately */ |
| if(!u1_is_hbd) |
| { |
| pui1_buffer = (UWORD8 *)pv_input; |
| |
| /* loop over all the values in the block */ |
| for(i = 0; i < u1_block_height; i++) |
| { |
| /* loop over a row in the block */ |
| for(j = 0; j < u1_block_width; j++) |
| { |
| sum += pui1_buffer[i * i4_stride + j]; |
| sq_sum += (pui1_buffer[i * i4_stride + j] * pui1_buffer[i * i4_stride + j]); |
| } |
| } |
| |
| if(!u1_disable_normalization) |
| { |
| mean = sum / total_elements; |
| variance = |
| ((total_elements * sq_sum) - (sum * sum)) / (total_elements * (total_elements)); |
| } |
| else |
| { |
| mean = sum; |
| variance = ((total_elements * sq_sum) - (sum * sum)); |
| } |
| } |
| |
| /* copy back the values to the output variables */ |
| *pi4_mean = mean; |
| *pu4_variance = variance; |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief |
| * This function calcluates the variance of given data set which is WORD16 |
| * |
| * @par Description: |
| * This function is mainly used to find the variance of the block of pixel values. |
| * Single pass variance calculation implementation. |
| * |
| * @param[in] pv_input |
| * The input buffer to calculate the variance. |
| * |
| * |
| * @param[in] stride |
| * Stride for the input buffer |
| * |
| * @param[out] pi4_mean |
| * Pointer ot the mean of the datset |
| * |
| * @param[out] pi4_variance |
| * Pointer tot he variabce of the data set |
| * |
| * @param[in] block_height |
| * height of the pixel block |
| * |
| * @param[in] block_width |
| * width of the pixel block |
| * |
| * |
| * @remarks |
| * None |
| * |
| *******************************************************************************/ |
| void ihevce_calc_variance_signed( |
| WORD16 *pv_input, |
| WORD32 i4_stride, |
| WORD32 *pi4_mean, |
| UWORD32 *pu4_variance, |
| UWORD8 u1_block_height, |
| UWORD8 u1_block_width) |
| { |
| WORD16 *pi2_buffer; // poinbter for 10 bit use case |
| |
| WORD32 i, j; |
| WORD32 total_elements; |
| |
| LWORD64 mean; |
| LWORD64 variance; |
| LWORD64 sum; |
| LWORD64 sq_sum; |
| |
| /* intialisation */ |
| total_elements = u1_block_height * u1_block_width; |
| mean = 0; |
| variance = 0; |
| sum = 0; |
| sq_sum = 0; |
| |
| pi2_buffer = pv_input; |
| |
| for(i = 0; i < u1_block_height; i++) |
| { |
| for(j = 0; j < u1_block_width; j++) |
| { |
| sum += pi2_buffer[i * i4_stride + j]; |
| sq_sum += (pi2_buffer[i * i4_stride + j] * pi2_buffer[i * i4_stride + j]); |
| } |
| } |
| |
| mean = sum; /// total_elements; |
| variance = ((total_elements * sq_sum) - (sum * sum)); // / (total_elements * (total_elements) ) |
| |
| /* copy back the values to the output variables */ |
| *pi4_mean = mean; |
| *pu4_variance = variance; |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief |
| * This function calculates the variance of a chrominance plane for 420SP data |
| * |
| * @par Description: |
| * This function is mainly used to find the variance of the block of pixel values. |
| * The block can be rectangular also. Single pass variance calculation |
| * implementation. |
| * |
| * @param[in] p_input |
| * The input buffer to calculate the variance. |
| * |
| * @param[in] stride |
| * Stride for the input buffer |
| * |
| * @param[out] pi4_mean |
| * Pointer ot the mean of the datset |
| * |
| * @param[out] pi4_variance |
| * Pointer tot he variabce of the data set |
| * |
| * @param[in] block_height |
| * height of the pixel block |
| * |
| * @param[in] block_width |
| * width of the pixel block |
| * |
| * @param[in] u1_is_hbd |
| * 1 if the data is in high bit depth |
| * |
| * @param[in] e_chroma_plane |
| * is U or V |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| void ihevce_calc_chroma_variance( |
| void *pv_input, |
| WORD32 i4_stride, |
| WORD32 *pi4_mean, |
| UWORD32 *pu4_variance, |
| UWORD8 u1_block_height, |
| UWORD8 u1_block_width, |
| UWORD8 u1_is_hbd, |
| CHROMA_PLANE_ID_T e_chroma_plane) |
| { |
| UWORD8 *pui1_buffer; // pointer for 8 bit usecase |
| WORD32 i, j; |
| WORD32 total_elements; |
| |
| LWORD64 mean; |
| ULWORD64 variance; |
| LWORD64 sum; |
| LWORD64 sq_sum; |
| |
| /* intialisation */ |
| total_elements = u1_block_height * u1_block_width; |
| mean = 0; |
| variance = 0; |
| sum = 0; |
| sq_sum = 0; |
| |
| /* handle the case of 8/10 bit depth separately */ |
| if(!u1_is_hbd) |
| { |
| pui1_buffer = (UWORD8 *)pv_input; |
| |
| pui1_buffer += e_chroma_plane; |
| |
| /* loop over all the values in the block */ |
| for(i = 0; i < u1_block_height; i++) |
| { |
| /* loop over a row in the block */ |
| for(j = 0; j < u1_block_width; j++) |
| { |
| sum += pui1_buffer[i * i4_stride + j * 2]; |
| sq_sum += (pui1_buffer[i * i4_stride + j * 2] * pui1_buffer[i * i4_stride + j * 2]); |
| } |
| } |
| |
| mean = sum / total_elements; |
| variance = ((total_elements * sq_sum) - (sum * sum)) / (total_elements * (total_elements)); |
| } |
| |
| /* copy back the values to the output variables */ |
| *pi4_mean = mean; |
| *pu4_variance = variance; |
| } |
| |
| LWORD64 ihevce_inject_stim_into_distortion( |
| void *pv_src, |
| WORD32 i4_src_stride, |
| void *pv_pred, |
| WORD32 i4_pred_stride, |
| LWORD64 i8_distortion, |
| WORD32 i4_alpha_stim_multiplier, |
| UWORD8 u1_blk_size, |
| UWORD8 u1_is_hbd, |
| UWORD8 u1_enable_psyRDOPT, |
| CHROMA_PLANE_ID_T e_chroma_plane) |
| { |
| if(!u1_enable_psyRDOPT) |
| { |
| UWORD32 u4_src_variance; |
| UWORD32 u4_pred_variance; |
| WORD32 i4_mean; |
| WORD32 i4_noise_term; |
| |
| if(NULL_PLANE == e_chroma_plane) |
| { |
| ihevce_calc_variance( |
| pv_src, |
| i4_src_stride, |
| &i4_mean, |
| &u4_src_variance, |
| u1_blk_size, |
| u1_blk_size, |
| u1_is_hbd, |
| 0); |
| |
| ihevce_calc_variance( |
| pv_pred, |
| i4_pred_stride, |
| &i4_mean, |
| &u4_pred_variance, |
| u1_blk_size, |
| u1_blk_size, |
| u1_is_hbd, |
| 0); |
| } |
| else |
| { |
| ihevce_calc_chroma_variance( |
| pv_src, |
| i4_src_stride, |
| &i4_mean, |
| &u4_src_variance, |
| u1_blk_size, |
| u1_blk_size, |
| u1_is_hbd, |
| e_chroma_plane); |
| |
| ihevce_calc_chroma_variance( |
| pv_pred, |
| i4_pred_stride, |
| &i4_mean, |
| &u4_pred_variance, |
| u1_blk_size, |
| u1_blk_size, |
| u1_is_hbd, |
| e_chroma_plane); |
| } |
| |
| i4_noise_term = |
| ihevce_compute_noise_term(i4_alpha_stim_multiplier, u4_src_variance, u4_pred_variance); |
| |
| MULTIPLY_STIM_WITH_DISTORTION(i8_distortion, i4_noise_term, STIM_Q_FORMAT, ALPHA_Q_FORMAT); |
| |
| return i8_distortion; |
| } |
| else |
| { |
| return i8_distortion; |
| } |
| } |
| |
| UWORD8 ihevce_determine_cu_noise_based_on_8x8Blk_data( |
| UWORD8 *pu1_is_8x8Blk_noisy, UWORD8 u1_cu_x_pos, UWORD8 u1_cu_y_pos, UWORD8 u1_cu_size) |
| { |
| UWORD8 u1_num_noisy_children = 0; |
| UWORD8 u1_start_index = (u1_cu_x_pos / 8) + u1_cu_y_pos; |
| |
| if(8 == u1_cu_size) |
| { |
| return pu1_is_8x8Blk_noisy[u1_start_index]; |
| } |
| |
| u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data( |
| pu1_is_8x8Blk_noisy, u1_cu_x_pos, u1_cu_y_pos, u1_cu_size / 2); |
| |
| u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data( |
| pu1_is_8x8Blk_noisy, u1_cu_x_pos + (u1_cu_size / 2), u1_cu_y_pos, u1_cu_size / 2); |
| |
| u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data( |
| pu1_is_8x8Blk_noisy, u1_cu_x_pos, u1_cu_y_pos + (u1_cu_size / 2), u1_cu_size / 2); |
| |
| u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data( |
| pu1_is_8x8Blk_noisy, |
| u1_cu_x_pos + (u1_cu_size / 2), |
| u1_cu_y_pos + (u1_cu_size / 2), |
| u1_cu_size / 2); |
| |
| return (u1_num_noisy_children >= 2); |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_psy_rd_cost_croma \endif |
| * |
| * \brief |
| * Calculates the psyco visual cost for RD opt. This is |
| * |
| * \param[in] pui4_source_satd |
| * This is the pointer to the array of 8x8 satd of the corresponding source CTB. This is pre calculated. |
| * \param[in] *pui1_recon |
| * This si the pointer to the pred data. |
| * \param[in] recon_stride |
| * This si the pred stride |
| * \param[in] pic_type |
| * Picture type. |
| * \param[in] layer_id |
| * Indicates the temporal layer. |
| * \param[in] lambda |
| * This is the weighting factor for the cost. |
| * \param[in] is_hbd |
| * This is the high bit depth flag which indicates if the bit depth of the pixels is 10 bit or 8 bit. |
| * \param[in] sub_sampling_type |
| * This is the chroma subsampling type. 11 - for 420 and 13 for 422 |
| * \return |
| * the cost for the psyRDopt |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| LWORD64 ihevce_psy_rd_cost_croma( |
| LWORD64 *pui4_source_satd, |
| void *p_recon, |
| WORD32 recon_stride_vert, |
| WORD32 recond_stride_horz, |
| WORD32 cu_size_luma, |
| WORD32 pic_type, |
| WORD32 layer_id, |
| WORD32 lambda, |
| WORD32 start_index, |
| WORD32 is_hbd, |
| WORD32 sub_sampling_type, |
| ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list) |
| { |
| /* declare local variables to store the SATD values for the pred for the current block. */ |
| LWORD64 psy_rd_cost; |
| UWORD32 lambda_mod; |
| WORD32 psy_factor; |
| |
| /* declare local variables */ |
| WORD32 i; |
| WORD32 cu_total_size; |
| WORD32 num_comp_had_blocks; |
| |
| UWORD8 *pu1_l0_block; |
| UWORD8 *pu1_l0_block_prev; |
| UWORD8 *pu1_recon; |
| WORD32 ht_offset; |
| WORD32 wd_offset; |
| WORD32 cu_ht; |
| WORD32 cu_wd; |
| |
| WORD32 num_horz_blocks; |
| |
| WORD16 pi2_residue_had[64]; |
| /* this is used as a buffer with all values equal to 0. This is emulate the case with |
| pred being zero in HAD fucntion */ |
| UWORD8 ai1_zeros_buffer[64]; |
| |
| WORD32 had_block_size; |
| LWORD64 source_satd; // to hold source for current 8x8 block |
| LWORD64 recon_satd; // holds the current recon 8x8 satd |
| |
| WORD32 index_for_src_satd; |
| |
| (void)recond_stride_horz; |
| (void)pic_type; |
| (void)layer_id; |
| if(!is_hbd) |
| { |
| pu1_recon = (UWORD8 *)p_recon; |
| } |
| |
| /**** initialize the variables ****/ |
| had_block_size = 4; |
| |
| if(sub_sampling_type == 1) // 420 |
| { |
| cu_ht = cu_size_luma / 2; |
| cu_wd = cu_size_luma / 2; |
| } |
| else |
| { |
| cu_ht = cu_size_luma; |
| cu_wd = cu_size_luma / 2; |
| } |
| |
| num_horz_blocks = 2 * cu_wd / had_block_size; //ctb_width / had_block_size; |
| ht_offset = -had_block_size; |
| wd_offset = 0; //-had_block_size; |
| |
| cu_total_size = cu_ht * cu_wd; |
| num_comp_had_blocks = 2 * cu_total_size / (had_block_size * had_block_size); |
| |
| index_for_src_satd = start_index; |
| |
| for(i = 0; i < 64; i++) |
| { |
| ai1_zeros_buffer[i] = 0; |
| } |
| |
| psy_factor = PSY_STRENGTH_CHROMA; |
| psy_rd_cost = 0; |
| lambda_mod = lambda * psy_factor; |
| |
| /************************************************************/ |
| /* loop over for every 4x4 blocks in the CU for Cb */ |
| for(i = 0; i < num_comp_had_blocks; i++) |
| { |
| if(i % num_horz_blocks == 0) |
| { |
| wd_offset = -had_block_size; |
| ht_offset += had_block_size; |
| } |
| wd_offset += had_block_size; |
| |
| /* source satd for the current 8x8 block */ |
| source_satd = pui4_source_satd[index_for_src_satd]; |
| |
| if(i % 2 != 0) |
| { |
| if(!is_hbd) |
| { |
| pu1_l0_block = pu1_l0_block_prev + 1; |
| } |
| } |
| else |
| { |
| if(!is_hbd) |
| { |
| /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */ |
| pu1_l0_block = pu1_recon + recon_stride_vert * ht_offset + wd_offset; |
| pu1_l0_block_prev = pu1_l0_block; |
| } |
| } |
| |
| if(had_block_size == 4) |
| { |
| if(!is_hbd) |
| { |
| recon_satd = ps_cmn_utils_optimised_function_list->pf_chroma_AC_HAD_4x4_8bit( |
| pu1_l0_block, |
| recon_stride_vert, |
| ai1_zeros_buffer, |
| had_block_size, |
| pi2_residue_had, |
| had_block_size); |
| } |
| |
| /* get the additional cost function based on the absolute SATD diff of source and recon. */ |
| psy_rd_cost += (lambda_mod * llabs(source_satd - recon_satd)); |
| |
| index_for_src_satd++; |
| |
| if((i % num_horz_blocks) == (num_horz_blocks - 1)) |
| { |
| index_for_src_satd -= num_horz_blocks; |
| index_for_src_satd += |
| (MAX_CU_SIZE / 8); /* Assuming CTB size = 64 and blocksize = 8 */ |
| } |
| |
| } // if had block size ==4 |
| } // for loop for all 4x4 block in the cu |
| |
| psy_rd_cost = psy_rd_cost >> (Q_PSY_STRENGTH_CHROMA + LAMBDA_Q_SHIFT); |
| /* reutrn the additional cost for the psy RD opt */ |
| return (psy_rd_cost); |
| } |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_psy_rd_cost \endif |
| * |
| * \brief |
| * Calculates the psyco visual cost for RD opt. This is |
| * |
| * \param[in] pui4_source_satd |
| * This is the pointer to the array of 8x8 satd of the corresponding source CTB. This is pre calculated. |
| * \param[in] *pui1_recon |
| * This si the pointer to the pred data. |
| * \param[in] recon_stride |
| * This si the pred stride |
| * \param[in] pic_type |
| * Picture type. |
| * \param[in] layer_id |
| * Indicates the temporal layer. |
| * \param[in] lambda |
| * This is the weighting factor for the cost. |
| * |
| * \return |
| * the cost for the psyRDopt |
| * |
| * \author |
| * Ittiam |
| * |
| ***************************************************************************** |
| */ |
| LWORD64 ihevce_psy_rd_cost( |
| LWORD64 *pui4_source_satd, |
| void *pv_recon, |
| WORD32 recon_stride_vert, |
| WORD32 recond_stride_horz, |
| WORD32 cu_size, |
| WORD32 pic_type, |
| WORD32 layer_id, |
| WORD32 lambda, |
| WORD32 start_index, |
| WORD32 is_hbd, |
| UWORD32 u4_psy_strength, |
| ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list) |
| { |
| /* declare local variables to store the SATD values for the pred for the current block. */ |
| LWORD64 psy_rd_cost; // TODO : check if overflow is there. |
| UWORD32 lambda_mod; |
| WORD32 psy_factor; |
| |
| /* declare local variables */ |
| WORD32 i; |
| WORD32 cu_total_size; |
| WORD32 num_comp_had_blocks; |
| |
| UWORD8 *pu1_l0_block; |
| UWORD8 *pu1_recon; |
| |
| WORD32 ht_offset; |
| WORD32 wd_offset; |
| WORD32 cu_ht; |
| WORD32 cu_wd; |
| |
| WORD32 num_horz_blocks; |
| |
| //WORD16 pi2_residue_had[64]; |
| WORD16 pi2_residue_had_zscan[64]; |
| //WORD16 pi2_residue[64]; |
| /* this is used as a buffer with all values equal to 0. This is emulate the case with |
| pred being zero in HAD fucntion */ |
| UWORD8 ai1_zeros_buffer[64]; |
| |
| WORD32 had_block_size; |
| LWORD64 source_satd; // to hold source for current 8x8 block |
| LWORD64 recon_satd; // holds the current recon 8x8 satd |
| |
| WORD32 index_for_src_satd; |
| |
| (void)recond_stride_horz; |
| (void)pic_type; |
| (void)layer_id; |
| /***** initialize the variables ****/ |
| had_block_size = 8; |
| cu_ht = cu_size; |
| cu_wd = cu_size; |
| |
| num_horz_blocks = cu_wd / had_block_size; //ctb_width / had_block_size; |
| |
| ht_offset = -had_block_size; |
| wd_offset = 0 - had_block_size; |
| |
| cu_total_size = cu_ht * cu_wd; |
| num_comp_had_blocks = cu_total_size / (had_block_size * had_block_size); |
| |
| index_for_src_satd = start_index; |
| |
| for(i = 0; i < 64; i++) |
| { |
| ai1_zeros_buffer[i] = 0; |
| } |
| psy_factor = u4_psy_strength; //PSY_STRENGTH; |
| psy_rd_cost = 0; |
| lambda_mod = lambda * psy_factor; |
| |
| if(!is_hbd) |
| { |
| pu1_recon = (UWORD8 *)pv_recon; |
| } |
| |
| /**************************************************************/ |
| /* loop over for every 8x8 blocks in the CU */ |
| for(i = 0; i < num_comp_had_blocks; i++) |
| { |
| if(i % num_horz_blocks == 0) |
| { |
| wd_offset = -had_block_size; |
| ht_offset += had_block_size; |
| } |
| wd_offset += had_block_size; |
| |
| /* source satd for the current 8x8 block */ |
| source_satd = pui4_source_satd[index_for_src_satd]; |
| |
| if(had_block_size == 8) |
| { |
| //WORD32 index; |
| //WORD32 u4_satd; |
| //WORD32 dst_strd = 8; |
| //WORD32 i4_frm_qstep = 0; |
| //WORD32 early_cbf; |
| if(!is_hbd) |
| { |
| /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */ |
| pu1_l0_block = pu1_recon + recon_stride_vert * ht_offset + wd_offset; |
| |
| recon_satd = ps_cmn_utils_optimised_function_list->pf_AC_HAD_8x8_8bit( |
| pu1_l0_block, |
| recon_stride_vert, |
| ai1_zeros_buffer, |
| had_block_size, |
| pi2_residue_had_zscan, |
| had_block_size); |
| } |
| |
| /* get the additional cost function based on the absolute SATD diff of source and recon. */ |
| psy_rd_cost += (lambda_mod * llabs(source_satd - recon_satd)); |
| |
| index_for_src_satd++; |
| if((i % num_horz_blocks) == (num_horz_blocks - 1)) |
| { |
| index_for_src_satd -= num_horz_blocks; |
| index_for_src_satd += |
| (MAX_CU_SIZE / 8); /* Assuming CTB size = 64 and blocksize = 8 */ |
| } |
| } // if |
| } // for loop |
| psy_rd_cost = psy_rd_cost >> (Q_PSY_STRENGTH + LAMBDA_Q_SHIFT); |
| |
| /* reutrn the additional cost for the psy RD opt */ |
| return (psy_rd_cost); |
| } |
| |
| unsigned long ihevce_calc_stim_injected_variance( |
| ULWORD64 *pu8_sigmaX, |
| ULWORD64 *pu8_sigmaXSquared, |
| ULWORD64 *u8_var, |
| WORD32 i4_inv_wpred_wt, |
| WORD32 i4_inv_wt_shift_val, |
| WORD32 i4_wpred_log_wdc, |
| WORD32 i4_part_id) |
| { |
| ULWORD64 u8_X_Square, u8_temp_var; |
| WORD32 i4_bits_req; |
| |
| const WORD32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT; |
| |
| u8_X_Square = (pu8_sigmaX[i4_part_id] * pu8_sigmaX[i4_part_id]); |
| u8_temp_var = pu8_sigmaXSquared[i4_part_id] - u8_X_Square; |
| |
| if(i4_inv_wpred_wt != i4_default_src_wt) |
| { |
| i4_inv_wpred_wt = i4_inv_wpred_wt >> i4_inv_wt_shift_val; |
| |
| u8_temp_var = SHR_NEG( |
| (u8_temp_var * i4_inv_wpred_wt * i4_inv_wpred_wt), |
| (30 - (2 * i4_inv_wt_shift_val) - i4_wpred_log_wdc * 2)); |
| } |
| |
| GETRANGE64(i4_bits_req, u8_temp_var); |
| |
| if(i4_bits_req > 27) |
| { |
| *u8_var = u8_temp_var >> (i4_bits_req - 27); |
| return (i4_bits_req - 27); |
| } |
| else |
| { |
| *u8_var = u8_temp_var; |
| return 0; |
| } |
| } |
| |
| unsigned long ihevce_calc_variance_for_diff_weights( |
| ULWORD64 *pu8_sigmaX, |
| ULWORD64 *pu8_sigmaXSquared, |
| ULWORD64 *u8_var, |
| WORD32 *pi4_inv_wt, |
| WORD32 *pi4_inv_wt_shift_val, |
| pu_result_t *ps_result, |
| WORD32 i4_wpred_log_wdc, |
| PART_ID_T *pe_part_id, |
| UWORD8 u1_cu_size, |
| UWORD8 u1_num_parts, |
| UWORD8 u1_is_for_src) |
| { |
| WORD32 i4_k; |
| UWORD32 u4_wd, u4_ht; |
| UWORD8 u1_num_base_blks; |
| UWORD32 u4_num_pixels_in_part; |
| UWORD8 u1_index; |
| WORD32 i4_bits_req; |
| |
| UWORD8 u1_base_blk_size = 4; |
| UWORD32 u4_tot_num_pixels = u1_cu_size * u1_cu_size; |
| ULWORD64 u8_temp_sigmaX[MAX_NUM_INTER_PARTS] = { 0, 0 }; |
| ULWORD64 u8_temp_sigmaXsquared[MAX_NUM_INTER_PARTS] = { 0, 0 }; |
| ULWORD64 u8_z; |
| |
| const WORD32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT; |
| |
| for(i4_k = 0; i4_k < u1_num_parts; i4_k++) |
| { |
| u4_wd = ps_result[i4_k].pu.b4_wd + 1; |
| u4_ht = ps_result[i4_k].pu.b4_ht + 1; |
| u1_num_base_blks = u4_wd * u4_ht; |
| u4_num_pixels_in_part = u1_num_base_blks * u1_base_blk_size * u1_base_blk_size; |
| |
| if(u1_is_for_src) |
| { |
| u1_index = pe_part_id[i4_k]; |
| } |
| else |
| { |
| u1_index = i4_k; |
| } |
| |
| u8_temp_sigmaXsquared[i4_k] = pu8_sigmaXSquared[u1_index] / u4_num_pixels_in_part; |
| u8_temp_sigmaX[i4_k] = pu8_sigmaX[u1_index]; |
| |
| if(u1_is_for_src) |
| { |
| if(pi4_inv_wt[i4_k] != i4_default_src_wt) |
| { |
| pi4_inv_wt[i4_k] = pi4_inv_wt[i4_k] >> pi4_inv_wt_shift_val[i4_k]; |
| u8_temp_sigmaX[i4_k] = SHR_NEG( |
| (u8_temp_sigmaX[i4_k] * pi4_inv_wt[i4_k]), |
| (15 - pi4_inv_wt_shift_val[i4_k] - i4_wpred_log_wdc)); |
| u8_temp_sigmaXsquared[i4_k] = SHR_NEG( |
| (u8_temp_sigmaXsquared[i4_k] * pi4_inv_wt[i4_k] * pi4_inv_wt[i4_k]), |
| (30 - (2 * pi4_inv_wt_shift_val[i4_k]) - i4_wpred_log_wdc * 2)); |
| } |
| } |
| } |
| |
| u8_z = (u4_tot_num_pixels * (u8_temp_sigmaXsquared[0] + u8_temp_sigmaXsquared[1])) - |
| ((u8_temp_sigmaX[0] + u8_temp_sigmaX[1]) * (u8_temp_sigmaX[0] + u8_temp_sigmaX[1])); |
| |
| GETRANGE64(i4_bits_req, u8_z); |
| |
| if(i4_bits_req > 27) |
| { |
| *u8_var = u8_z >> (i4_bits_req - 27); |
| return (i4_bits_req - 27); |
| } |
| else |
| { |
| *u8_var = u8_z; |
| return 0; |
| } |
| } |